/// <summary> /// returns oscillations using the DCT. /// </summary> public static void GetOscillationUsingDct(double[] array, double framesPerSecond, double[,] cosines, out double oscilFreq, out double period, out double intenisty) { var modifiedArray = DataTools.SubtractMean(array); var dctCoeff = MFCCStuff.DCT(modifiedArray, cosines); // convert to absolute values because not interested in negative values due to phase. for (int i = 0; i < dctCoeff.Length; i++) { dctCoeff[i] = Math.Abs(dctCoeff[i]); } // remove low freq oscillations from consideration int thresholdIndex = dctCoeff.Length / 5; for (int i = 0; i < thresholdIndex; i++) { dctCoeff[i] = 0.0; } dctCoeff = DataTools.normalise2UnitLength(dctCoeff); //dct = DataTools.NormaliseMatrixValues(dctCoeff); //another option to NormaliseMatrixValues int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff); //recalculate DCT duration in seconds double dctDuration = dctCoeff.Length / framesPerSecond; oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi period = 2 * dctCoeff.Length / (double)indexOfMaxValue / framesPerSecond; //convert maxID to period in seconds intenisty = dctCoeff[indexOfMaxValue]; }
public void TestDecibelSpectrogram() { var recording = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav")); // specfied linear scale var freqScale = new FrequencyScale(nyquist: 11025, frameSize: 1024, hertzGridInterval: 1000); var sonoConfig = new SonogramConfig { WindowSize = freqScale.FinalBinCount * 2, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; // DO EQUALITY TEST on the AMPLITUDE SONGOGRAM DATA // Do not bother with the image because this is only an amplitude spectrogram. var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); // DO FILE EQUALITY TEST on the DECIBEL SONGOGRAM DATA // Do not bother with the image because this has been tested elsewhere. var decibelSonogram = MFCCStuff.DecibelSpectra(sonogram.Data, sonogram.Configuration.WindowPower, sonogram.SampleRate, sonogram.Configuration.epsilon); var expectedFile = PathHelper.ResolveAsset("StandardSonograms", "BAC2_20071008_DecibelSonogramData.EXPECTED.bin"); // run this once to generate expected test data // uncomment this to update the binary data. Should be rarely needed // AT: Updated 2017-02-15 because FFT library changed in 864f7a491e2ea0e938161bd390c1c931ecbdf63c //Binary.Serialize(expectedFile, decibelSonogram); var expected = Binary.Deserialize <double[, ]>(expectedFile); CollectionAssert.That.AreEqual(expected, decibelSonogram, EnvelopeAndFftTests.Delta); }
//################################################################################################################################## /// <summary> /// NOTE!!!! The decibel array has been normalised in 0 - 1. /// </summary> protected static Tuple <double[, ], double[]> MakeCepstrogram(SonogramConfig config, double[,] matrix, double[] decibels, int sampleRate) { double[,] m = matrix; int nyquist = sampleRate / 2; double epsilon = config.epsilon; bool includeDelta = config.mfccConfig.IncludeDelta; bool includeDoubleDelta = config.mfccConfig.IncludeDoubleDelta; //Log.WriteIfVerbose(" MakeCepstrogram(matrix, decibels, includeDelta=" + includeDelta + ", includeDoubleDelta=" + includeDoubleDelta + ")"); //(i) APPLY FILTER BANK int bandCount = config.mfccConfig.FilterbankCount; bool doMelScale = config.mfccConfig.DoMelScale; int ccCount = config.mfccConfig.CcCount; int fftBinCount = config.FreqBinCount; //number of Hz bands = 2^N +1. Subtract DC bin int minHz = config.MinFreqBand ?? 0; int maxHz = config.MaxFreqBand ?? nyquist; Log.WriteIfVerbose("ApplyFilterBank(): Dim prior to filter bank =" + matrix.GetLength(1)); //error check that filterBankCount < FFTbins if (bandCount > fftBinCount) { throw new Exception( "## FATAL ERROR in BaseSonogram.MakeCepstrogram():- Can't calculate cepstral coeff. FilterbankCount > FFTbins. (" + bandCount + " > " + fftBinCount + ")\n\n"); } //this is the filter count for full bandwidth 0-Nyquist. This number is trimmed proportionately to fit the required bandwidth. if (doMelScale) { m = MFCCStuff.MelFilterBank(m, bandCount, nyquist, minHz, maxHz); // using the Greg integral } else { m = MFCCStuff.LinearFilterBank(m, bandCount, nyquist, minHz, maxHz); } Log.WriteIfVerbose("\tDim after filter bank=" + m.GetLength(1) + " (Max filter bank=" + bandCount + ")"); //(ii) CONVERT AMPLITUDES TO DECIBELS m = MFCCStuff.DecibelSpectra(m, config.WindowPower, sampleRate, epsilon); //from spectrogram //(iii) NOISE REDUCTION var tuple1 = SNR.NoiseReduce(m, config.NoiseReductionType, config.NoiseReductionParameter); m = tuple1.Item1; //(iv) calculate cepstral coefficients m = MFCCStuff.Cepstra(m, ccCount); //(v) NormaliseMatrixValues m = DataTools.normalise(m); //(vi) Calculate the full range of MFCC coefficients ie including decibel and deltas, etc m = MFCCStuff.AcousticVectors(m, decibels, includeDelta, includeDoubleDelta); var tuple2 = Tuple.Create(m, tuple1.Item2); return(tuple2); // return matrix and full bandwidth modal noise profile }
public void SonogramDecibelMethodsAreEquivalent() { var recording = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav")); // specfied linear scale var freqScale = new FrequencyScale(nyquist: 11025, frameSize: 1024, hertzGridInterval: 1000); var sonoConfig = new SonogramConfig { WindowSize = freqScale.FinalBinCount * 2, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; // Method 1 var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); var expectedDecibelSonogram = MFCCStuff.DecibelSpectra(sonogram.Data, sonogram.Configuration.WindowPower, sonogram.SampleRate, sonogram.Configuration.epsilon); // Method 2: make sure that the decibel spectrum is the same no matter which path we take to calculate it. var actualDecibelSpectrogram = new SpectrogramStandard(sonoConfig, recording.WavReader); CollectionAssert.That.AreEqual(expectedDecibelSonogram, actualDecibelSpectrogram.Data, EnvelopeAndFftTests.Delta); }
/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram. /// Developed for GenericRecognizer of harmonics. /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise. /// But it requires that the frequency of any potential formants is not changing rapidly. /// THis may not be suitable for detecting human speech. However can reduce the frame step. /// </summary> /// <param name="m">spectrogram data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var binCount = m.GetLength(1); //set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(binCount, binCount); // set up arrays to store decibels, formant intensity and max index. var dBArray = new double[rowCount]; var intensity = new double[rowCount]; var maxIndexArray = new int[rowCount]; // for all time frames for (int t = 2; t < rowCount - 2; t++) { // get average of five adjacent frames var frame1 = MatrixTools.GetRow(m, t - 2); var frame2 = MatrixTools.GetRow(m, t - 1); var frame3 = MatrixTools.GetRow(m, t); var frame4 = MatrixTools.GetRow(m, t + 1); var frame5 = MatrixTools.GetRow(m, t + 2); var frame = new double[colCount]; for (int i = 0; i < colCount; i++) { frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5; } double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { // Would normally normalise the xcorr values for overlap count. // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values. //normXr[i] = xr[i] / (colCount - i); normXr[i] = xr[i]; } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
/// <summary> /// WARNING: calculation of k1 and k2 is faulty. /// MinDecibelReference should not be used ie k1 = EndpointDetectionConfiguration.SegmentationThresholdK1; /// See the alternative below /// /// ************* PARAMETERS FOR:- ENDPOINT DETECTION of VOCALISATIONS /// See Lamel et al 1981. /// They use k1, k2, k3 and k4, minimum pulse length and k1_k2Latency. /// Here we set k1 = k3, k4 = k2, k1_k2Latency = 0.186s (5 frames) /// and "minimum pulse length" = 0.075s (2 frames) /// SEGMENTATION_THRESHOLD_K1 = decibels above the minimum level /// SEGMENTATION_THRESHOLD_K2 = decibels above the minimum level /// K1_K2_LATENCY = seconds delay between signal reaching k1 and k2 thresholds /// VOCAL_DELAY = seconds delay required to separate vocalisations /// MIN_VOCAL_DURATION = minimum length of energy pulse - do not use this - accept all pulses. /// SEGMENTATION_THRESHOLD_K1=3.5 /// SEGMENTATION_THRESHOLD_K2=6.0 /// K1_K2_LATENCY=0.05 /// VOCAL_DELAY=0.2. /// </summary> public static int[] DetermineVocalisationEndpoints(double[] dbArray, double frameStep) { var k1k2Delay = (int)(K1K2Latency / frameStep); //=5 frames delay between signal reaching k1 and k2 thresholds var frameGap = (int)(VocalGap / frameStep); //=10 frames delay required to separate vocalisations var minPulse = (int)(MinPulseDuration / frameStep); //=2 frames is min vocal length return(MFCCStuff.VocalizationDetection(dbArray, K1Threshold, K2Threshold, k1k2Delay, frameGap, minPulse, null)); }
public void SonogramDecibelMethodsAreEquivalent() { // Method 1 var sonogram = new AmplitudeSonogram(this.sonoConfig, this.recording.WavReader); var expectedDecibelSonogram = MFCCStuff.DecibelSpectra(sonogram.Data, sonogram.Configuration.WindowPower, sonogram.SampleRate, sonogram.Configuration.epsilon); // Method 2: make sure that the decibel spectrum is the same no matter which path we take to calculate it. var actualDecibelSpectrogram = new SpectrogramStandard(this.sonoConfig, this.recording.WavReader); CollectionAssert.That.AreEqual(expectedDecibelSonogram, actualDecibelSpectrogram.Data, EnvelopeAndFftTests.Delta); }
private double[,] SobelEdgegram(double[,] matrix) { double[,] m = MFCCStuff.DecibelSpectra(matrix, this.Configuration.WindowPower, this.SampleRate, this.Configuration.epsilon); //from spectrogram //double[,] m = Speech.DecibelSpectra(matrix); //NOISE REDUCTION var output = SNR.NoiseReduce(m, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); this.SnrData.ModalNoiseProfile = output.Item2; return(ImageTools.SobelEdgeDetection(output.Item1)); }
public static double[,] GetDecibelSpectrogramNoiseReduced(AudioRecording recording, int frameSize) { int frameStep = frameSize; // get decibel spectrogram var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep); var spectrogram = MFCCStuff.DecibelSpectra(results.AmplitudeSpectrogram, results.WindowPower, recording.SampleRate, recording.Epsilon); // remove background noise from spectrogram double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(spectrogram); spectrogram = SNR.TruncateBgNoiseFromSpectrogram(spectrogram, spectralDecibelBgn); spectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogram, nhThreshold: 3.0); return(spectrogram); }
/// <summary> /// The data passed to this method must be the Spectral sonogram. /// </summary> public static Tuple <double[, ], double[]> GetCepstrogram(double[,] data, int minHz, int maxHz, int freqBinCount, double freqBinWidth, bool doMelScale, int ccCount) { ImageTools.DrawMatrix(data, @"C:\SensorNetworks\Output\MFCC_LewinsRail\tempImage1.jpg", false); double[,] m = SpectrogramTools.ExtractFreqSubband(data, minHz, maxHz, doMelScale, freqBinCount, freqBinWidth); ImageTools.DrawMatrix(m, @"C:\SensorNetworks\Output\MFCC_LewinsRail\tempImage2.jpg", false); //DO NOT DO NOISE REDUCTION BECAUSE ALREADY DONE //double[] modalNoise = SNR.CalculateModalNoise(m, 7); //calculate modal noise profile and smooth //m = SNR.NoiseReduce_Standard(m, modalNoise); //m = SNR.NoiseReduce_FixedRange(m, this.Configuration.DynamicRange); m = MFCCStuff.Cepstra(m, ccCount); m = DataTools.normalise(m); ImageTools.DrawMatrix(m, @"C:\SensorNetworks\Output\MFCC_LewinsRail\tempImage3.jpg", false); return(Tuple.Create(m, (double[])null)); }
/// <summary> /// Initializes a new instance of the <see cref="DecibelSpectrogram"/> class. /// </summary> public DecibelSpectrogram(AmplitudeSpectrogram amplitudeSpectrogram) { this.Configuration = amplitudeSpectrogram.Configuration; this.Attributes = amplitudeSpectrogram.Attributes; // (ii) CONVERT AMPLITUDES TO DECIBELS this.Data = MFCCStuff.DecibelSpectra(amplitudeSpectrogram.Data, this.Attributes.WindowPower, this.Attributes.SampleRate, this.Attributes.Epsilon); // (iii) NOISE REDUCTION var tuple = SNR.NoiseReduce(this.Data, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); this.Data = tuple.Item1; // store data matrix if (this.SnrData != null) { this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile } }
/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Developed for GenericRecognizer of harmonics. /// </summary> /// <param name="m">data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>two arrays.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); double[] dBArray = new double[rowCount]; var intensity = new double[rowCount]; //an array of formant intensity var maxIndexArray = new int[rowCount]; //an array of max value index values var binCount = m.GetLength(1); double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients // for all time frames for (int t = 0; t < rowCount; t++) { var frame = MatrixTools.GetRow(m, t); double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. Also need to normalise the values for overlap count. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { normXr[i] = xr[i] / (colCount - i); } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } // frames = rows of matrix return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
/// <summary> /// Initializes a new instance of the <see cref="AmplitudeSpectrogram"/> class. /// </summary> public AmplitudeSpectrogram(SpectrogramSettings config, WavReader wav) { this.Configuration = config; this.Attributes = new SpectrogramAttributes(); double minDuration = 1.0; if (wav.Time.TotalSeconds < minDuration) { LoggedConsole.WriteLine("Signal must at least {0} seconds long to produce a sonogram!", minDuration); return; } //set attributes for the current recording and spectrogram type this.Attributes.SampleRate = wav.SampleRate; this.Attributes.Duration = wav.Time; this.Attributes.NyquistFrequency = wav.SampleRate / 2; this.Attributes.Duration = wav.Time; this.Attributes.MaxAmplitude = wav.CalculateMaximumAmplitude(); this.Attributes.FrameDuration = TimeSpan.FromSeconds(this.Configuration.WindowSize / (double)wav.SampleRate); var recording = new AudioRecording(wav); var fftdata = DSP_Frames.ExtractEnvelopeAndFfts( recording, config.WindowSize, config.WindowOverlap, this.Configuration.WindowFunction); // now recover required data //epsilon is a signal dependent minimum amplitude value to prevent possible subsequent log of zero value. this.Attributes.Epsilon = fftdata.Epsilon; this.Attributes.WindowPower = fftdata.WindowPower; this.Attributes.FrameCount = fftdata.FrameCount; this.Data = fftdata.AmplitudeSpectrogram; // IF REQUIRED CONVERT TO MEL SCALE if (this.Configuration.DoMelScale) { // this mel scale conversion uses the "Greg integral" ! this.Data = MFCCStuff.MelFilterBank(this.Data, this.Configuration.MelBinCount, this.Attributes.NyquistFrequency, 0, this.Attributes.NyquistFrequency); } }
public void TestDecibelSpectrogram() { // DO EQUALITY TEST on the AMPLITUDE SONGOGRAM DATA // Do not bother with the image because this is only an amplitude spectrogram. var sonogram = new AmplitudeSonogram(this.sonoConfig, this.recording.WavReader); // DO FILE EQUALITY TEST on the DECIBEL SONGOGRAM DATA // Do not bother with the image because this has been tested elsewhere. var decibelSonogram = MFCCStuff.DecibelSpectra(sonogram.Data, sonogram.Configuration.WindowPower, sonogram.SampleRate, sonogram.Configuration.epsilon); var expectedFile = PathHelper.ResolveAsset("StandardSonograms", "BAC2_20071008_DecibelSonogramData.EXPECTED.bin"); // run this once to generate expected test data // uncomment this to update the binary data. Should be rarely needed // AT: Updated 2017-02-15 because FFT library changed in 864f7a491e2ea0e938161bd390c1c931ecbdf63c //Binary.Serialize(expectedFile, decibelSonogram); var expected = Binary.Deserialize <double[, ]>(expectedFile); CollectionAssert.That.AreEqual(expected, decibelSonogram, EnvelopeAndFftTests.Delta); }
public void TestStandardNoiseRemoval() { var recording = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav")); int windowSize = 512; var sr = recording.SampleRate; // window overlap is used only for sonograms. It is not used when calculating acoustic indices. double windowOverlap = 0.0; var windowFunction = WindowFunctions.HAMMING.ToString(); var fftdata = DSP_Frames.ExtractEnvelopeAndFfts( recording, windowSize, windowOverlap, windowFunction); // Now recover the data // The following data is required when constructing sonograms //var duration = recording.WavReader.Time; //var frameCount = fftdata.FrameCount; //var fractionOfHighEnergyFrames = fftdata.FractionOfHighEnergyFrames; double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(fftdata.AmplitudeSpectrogram, fftdata.WindowPower, sr, fftdata.Epsilon); // The following call to NoiseProfile.CalculateBackgroundNoise(double[,] spectrogram) // returns a noise profile that is used as the BGN spectral index. // It calculates the modal background noise for each freqeuncy bin and then returns a smoothed version. // By default, the number of SDs = 0 and the smoothing window = 7. // Method assumes that the passed spectrogram is oriented as: rows=frames, cols=freq bins.</param> double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(deciBelSpectrogram); var resourcesDir = PathHelper.ResolveAssetPath("Indices"); var expectedSpectrumFile = new FileInfo(resourcesDir + "\\NoiseProfile.bin"); //Binary.Serialize(expectedSpectrumFile, spectralDecibelBgn); var expectedVector = Binary.Deserialize <double[]>(expectedSpectrumFile); CollectionAssert.That.AreEqual(expectedVector, spectralDecibelBgn, 0.000_000_001); }
} // LocalPeaks() /// <summary> /// CALCULATEs SPECTRAL PEAK TRACKS: spectralIndices.SPT, RHZ, RVT, RPS, RNG /// This method is only called from IndexCalulate.analysis() when the IndexCalculation Duration is less than 10 seconds, /// because need to recalculate background noise etc. /// Otherwise the constructor of this class is called: sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold); /// NOTE: We require a noise reduced decibel spectrogram /// FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth. /// </summary> public static SpectralPeakTracks CalculateSpectralPeakTracks(AudioRecording recording, int sampleStart, int sampleEnd, int frameSize, bool octaveScale, double peakThreshold) { double epsilon = recording.Epsilon; int sampleRate = recording.WavReader.SampleRate; int bufferFrameCount = 2; // 2 because must allow for edge effects when using 5x5 grid to find ridges. int ridgeBuffer = frameSize * bufferFrameCount; var ridgeRecording = AudioRecording.GetRecordingSubsegment(recording, sampleStart, sampleEnd, ridgeBuffer); int frameStep = frameSize; var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(ridgeRecording, frameSize, frameStep); // Generate the ridge SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram // i: generate the SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram double[,] decibelSpectrogram; if (octaveScale) { var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); decibelSpectrogram = OctaveFreqScale.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon, freqScale); } else { decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon); } // calculate the noise profile var spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); double nhDecibelThreshold = 2.0; // SPECTRAL dB THRESHOLD for smoothing background decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhDecibelThreshold); // thresholds in decibels // double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second // TimeSpan frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); var sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold); return(sptInfo); }
public static double[] DoDct(double[] vector, double[,] cosines, int lowerDctBound) { //var dctArray = DataTools.Vector2Zscores(dctArray); var dctArray = DataTools.SubtractMean(vector); int dctLength = dctArray.Length; double[] dctCoeff = MFCCStuff.DCT(dctArray, cosines); // convert to absolute values because not interested in negative values due to phase. for (int i = 0; i < dctLength; i++) { dctCoeff[i] = Math.Abs(dctCoeff[i]); } // remove lower coefficients from consideration because they dominate for (int i = 0; i < lowerDctBound; i++) { dctCoeff[i] = 0.0; } dctCoeff = DataTools.normalise2UnitLength(dctCoeff); return(dctCoeff); }
}//end CONSTRUCTOR public override void Make(double[,] amplitudeM) { double[,] m = amplitudeM; // (i) IF REQUIRED CONVERT TO FULL BAND WIDTH MEL SCALE // Make sure you have Configuration.MelBinCount somewhere if (this.Configuration.DoMelScale) { m = MFCCStuff.MelFilterBank(m, this.Configuration.MelBinCount, this.NyquistFrequency, 0, this.NyquistFrequency); // using the Greg integral } // (ii) CONVERT AMPLITUDES TO DECIBELS m = MFCCStuff.DecibelSpectra(m, this.Configuration.WindowPower, this.SampleRate, this.Configuration.epsilon); // (iii) NOISE REDUCTION var tuple = SNR.NoiseReduce(m, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); this.Data = tuple.Item1; // store data matrix if (this.SnrData != null) { this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile } }
/// <summary> /// THE KEY ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaBicolorConfig lbConfig, bool drawDebugImage, TimeSpan segmentStartOffset) { double decibelThreshold = lbConfig.DecibelThreshold; //dB double intensityThreshold = lbConfig.IntensityThreshold; //double eventThreshold = lbConfig.EventThreshold; //in 0-1 if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 3 * lbConfig.MaxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0); // Could smooth here rather than above. Above seemed slightly better? amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var predictedEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lbConfig.LowerBandMinHz, lbConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, decibelThreshold, lbConfig.MinDuration, lbConfig.MaxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // init the score array double[] scores = new double[rowCount]; //iii: CONVERT SCORES TO ACOUSTIC EVENTS // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedEvents) { //rowtop, rowWidth int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } // lay down score for sample length for (int j = 0; j < dctLength; j++) { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = "L.b"; ae.Score_MaxInEvent = maximumIntensity; confirmedEvents.Add(ae); } } //###################################################################### // calculate the cosine similarity scores var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold); //DEBUG IMAGE this recognizer only. MUST set false for deployment. Image debugImage = null; if (drawDebugImage) { // display a variety of debug score arrays //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold); //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold); //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold); DataTools.Normalise(amplitudeScores, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; // other debug plots //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot }; debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
/// <summary> /// Calculate summary statistics for supplied temporal and spectral targets. /// </summary> /// <remarks> /// The acoustic statistics calculated in this method are based on methods outlined in /// "Acoustic classification of multiple simultaneous bird species: A multi-instance multi-label approach", /// by Forrest Briggs, Balaji Lakshminarayanan, Lawrence Neal, Xiaoli Z.Fern, Raviv Raich, Sarah J.K.Hadley, Adam S. Hadley, Matthew G. Betts, et al. /// The Journal of the Acoustical Society of America v131, pp4640 (2012); doi: http://dx.doi.org/10.1121/1.4707424 /// .. /// The Briggs feature are calculated from the column (freq bin) and row (frame) sums of the extracted spectrogram. /// 1. Gini Index for frame and bin sums. A measure of dispersion. Problem with gini is that its value is dependent on the row or column count. /// We use entropy instead because value not dependent on row or column count because it is normalized. /// For the following meausres of k-central moments, the freq and time values are normalized in 0,1 to width of the event. /// 2. freq-mean /// 3. freq-variance /// 4. freq-skew and kurtosis /// 5. time-mean /// 6. time-variance /// 7. time-skew and kurtosis /// 8. freq-max (normalized) /// 9. time-max (normalized) /// 10. Briggs et al also calculate a 16 value histogram of gradients for each event mask. We do not do that here although we could. /// ... /// NOTE 1: There are differences between our method of noise reduction and Briggs. Briggs does not convert to decibels /// and instead works with power values. He obtains a noise profile from the 20% of frames having the lowest energy sum. /// NOTE 2: To NormaliseMatrixValues for noise, they divide the actual energy by the noise value. This is equivalent to subtraction when working in decibels. /// There are advantages and disadvantages to Briggs method versus ours. In our case, we hve to convert decibel values back to /// energy values when calculating the statistics for the extracted acoustic event. /// NOTE 3: We do not calculate the higher central moments of the time/frequency profiles, i.e. skew and kurtosis. /// Ony mean and standard deviation. /// .. /// NOTE 4: This method assumes that the passed event occurs totally within the passed recording, /// AND that the passed recording is of sufficient duration to obtain reliable BGN noise profile /// BUT not so long as to cause memory constipation. /// </remarks> /// <param name="recording">as type AudioRecording which contains the event</param> /// <param name="temporalTarget">Both start and end bounds - relative to the supplied recording</param> /// <param name="spectralTarget">both bottom and top bounds in Hertz</param> /// <param name="config">parameters that determine the outcome of the analysis</param> /// <param name="segmentStartOffset">How long since the start of the recording this event occurred</param> /// <returns>an instance of EventStatistics</returns> public static EventStatistics AnalyzeAudioEvent( AudioRecording recording, Range <TimeSpan> temporalTarget, Range <double> spectralTarget, EventStatisticsConfiguration config, TimeSpan segmentStartOffset) { var stats = new EventStatistics { EventStartSeconds = temporalTarget.Minimum.TotalSeconds, EventEndSeconds = temporalTarget.Maximum.TotalSeconds, LowFrequencyHertz = spectralTarget.Minimum, HighFrequencyHertz = spectralTarget.Maximum, SegmentDurationSeconds = recording.Duration.TotalSeconds, SegmentStartSeconds = segmentStartOffset.TotalSeconds, }; // temporal target is supplied relative to recording, but not the supplied audio segment // shift coordinates relative to segment var localTemporalTarget = temporalTarget.Shift(-segmentStartOffset); if (!recording .Duration .AsRangeFromZero(Topology.Inclusive) .Contains(localTemporalTarget)) { stats.Error = true; stats.ErrorMessage = $"Audio not long enough ({recording.Duration}) to analyze target ({localTemporalTarget})"; return(stats); } // convert recording to spectrogram int sampleRate = recording.SampleRate; double epsilon = recording.Epsilon; // extract the spectrogram var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, config.FrameSize, config.FrameStep); double hertzBinWidth = dspOutput1.FreqBinWidth; var stepDurationInSeconds = config.FrameStep / (double)sampleRate; var startFrame = (int)Math.Ceiling(localTemporalTarget.Minimum.TotalSeconds / stepDurationInSeconds); // subtract 1 frame because want to end before start of end point. var endFrame = (int)Math.Floor(localTemporalTarget.Maximum.TotalSeconds / stepDurationInSeconds) - 1; var bottomBin = (int)Math.Floor(spectralTarget.Minimum / hertzBinWidth); var topBin = (int)Math.Ceiling(spectralTarget.Maximum / hertzBinWidth); // Events can have their high value set to the nyquist. // Since the submatrix call below uses an inclusive upper bound an index out of bounds exception occurs in // these cases. So we just ask for the bin below. if (topBin >= config.FrameSize / 2) { topBin = (config.FrameSize / 2) - 1; } // Convert amplitude spectrogram to deciBels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // extract the required acoustic event var eventMatrix = MatrixTools.Submatrix(decibelSpectrogram, startFrame, bottomBin, endFrame, topBin); // Get the SNR of the event. This is just the max value in the matrix because noise reduced MatrixTools.MinMax(eventMatrix, out _, out double max); stats.SnrDecibels = max; // Now need to convert event matrix back to energy values before calculating other statistics eventMatrix = MatrixTools.Decibels2Power(eventMatrix); var columnAverages = MatrixTools.GetColumnAverages(eventMatrix); var rowAverages = MatrixTools.GetRowAverages(eventMatrix); // calculate the mean and temporal standard deviation in decibels NormalDist.AverageAndSD(rowAverages, out double mean, out double stddev); stats.MeanDecibels = 10 * Math.Log10(mean); stats.TemporalStdDevDecibels = 10 * Math.Log10(stddev); // calculate the frequency standard deviation in decibels NormalDist.AverageAndSD(columnAverages, out mean, out stddev); stats.FreqBinStdDevDecibels = 10 * Math.Log10(stddev); // calculate relative location of the temporal maximum int maxRowId = DataTools.GetMaxIndex(rowAverages); stats.TemporalMaxRelative = maxRowId / (double)rowAverages.Length; // calculate the entropy dispersion/concentration indices stats.TemporalEnergyDistribution = 1 - DataTools.EntropyNormalised(rowAverages); stats.SpectralEnergyDistribution = 1 - DataTools.EntropyNormalised(columnAverages); // calculate the spectral centroid and the dominant frequency double binCentroid = CalculateSpectralCentroid(columnAverages); stats.SpectralCentroid = (int)Math.Round(hertzBinWidth * binCentroid) + (int)spectralTarget.Minimum; int maxColumnId = DataTools.GetMaxIndex(columnAverages); stats.DominantFrequency = (int)Math.Round(hertzBinWidth * maxColumnId) + (int)spectralTarget.Minimum; // remainder of this method is to produce debugging images. Can comment out when not debugging. /* * var normalisedIndex = DataTools.NormaliseMatrixValues(columnAverages); * var image4 = GraphsAndCharts.DrawGraph("columnSums", normalisedIndex, 100); * string path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\columnSums.png"; * image4.Save(path4); * normalisedIndex = DataTools.NormaliseMatrixValues(rowAverages); * image4 = GraphsAndCharts.DrawGraph("rowSums", normalisedIndex, 100); * path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\rowSums.png"; * image4.Save(path4); */ return(stats); }
public static Tuple <double[]> Execute_MFCC_XCOR(double[,] target, double dynamicRange, SpectrogramStandard sonogram, List <AcousticEvent> segments, int minHz, int maxHz, double minDuration) { Log.WriteLine("SEARCHING FOR EVENTS LIKE TARGET."); if (segments == null) { return(null); } int minBin = (int)(minHz / sonogram.FBinWidth); int maxBin = (int)(maxHz / sonogram.FBinWidth); int targetLength = target.GetLength(0); //set up the matrix of cosine coefficients int coeffCount = 12; //only use first 12 coefficients. int binCount = target.GetLength(1); //number of filters in filter bank double[,] cosines = MFCCStuff.Cosines(binCount, coeffCount + 1); //set up the cosine coefficients //adjust target's dynamic range to that set by user target = SNR.SetDynamicRange(target, 3.0, dynamicRange); //set event's dynamic range target = MFCCStuff.Cepstra(target, coeffCount, cosines); double[] v1 = DataTools.Matrix2Array(target); v1 = DataTools.normalise2UnitLength(v1); string imagePath2 = @"C:\SensorNetworks\Output\FELT_Currawong\target.png"; var result1 = BaseSonogram.Data2ImageData(target); var image = result1.Item1; ImageTools.DrawMatrix(image, 1, 1, imagePath2); double[] scores = new double[sonogram.FrameCount]; foreach (AcousticEvent av in segments) { Log.WriteLine("SEARCHING SEGMENT."); int startRow = (int)Math.Round(av.TimeStart * sonogram.FramesPerSecond); int endRow = (int)Math.Round(av.TimeEnd * sonogram.FramesPerSecond); if (endRow >= sonogram.FrameCount) { endRow = sonogram.FrameCount - 1; } endRow -= targetLength; if (endRow <= startRow) { endRow = startRow + 1; //want minimum of one row } for (int r = startRow; r < endRow; r++) { double[,] matrix = DataTools.Submatrix(sonogram.Data, r, minBin, r + targetLength - 1, maxBin); matrix = SNR.SetDynamicRange(matrix, 3.0, dynamicRange); //set event's dynamic range //string imagePath2 = @"C:\SensorNetworks\Output\FELT_Gecko\compare.png"; //var image = BaseSonogram.Data2ImageData(matrix); //ImageTools.DrawMatrix(image, 1, 1, imagePath2); matrix = MFCCStuff.Cepstra(matrix, coeffCount, cosines); double[] v2 = DataTools.Matrix2Array(matrix); v2 = DataTools.normalise2UnitLength(v2); double crossCor = DataTools.DotProduct(v1, v2); scores[r] = crossCor; } //end of rows in segment } //foreach (AcousticEvent av in segments) var tuple = Tuple.Create(scores); return(tuple); }
/// <summary> /// THIS METHOD NO LONGER IN USE. /// NOT USEFUL FOR ANIMAL CALLS. /// Tried this but it is suitable only when there is guarantee of numerous spectral tracks as in the vowels of human speech. /// It yields SPURIOUS RESULTS where there is only one whistle track. /// </summary> public static double[,] DetectHarmonicsUsingDCT(double[,] matrix, int minBin, int maxBin, int hzWidth, bool normaliseDCT, int minPeriod, int maxPeriod, double dctThreshold) { int dctLength = maxBin - minBin + 1; //DCT spans N freq bins int minIndex = (int)(hzWidth / (double)maxPeriod * 2); //Times 0.5 because index = Pi and not 2Pi int maxIndex = (int)(hzWidth / (double)minPeriod * 2); //Times 0.5 because index = Pi and not 2Pi //double period = hzWidth / (double)indexOfMaxValue * 2; //Times 2 because index = Pi and not 2Pi if (maxIndex > dctLength) { maxIndex = dctLength; //safety check in case of future changes to code. } int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); double[,] hits = new double[rows, cols]; double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients for (int r = 0; r < rows - dctLength; r++) { //for (int c = minBin; c <= minBin; c++)//traverse columns - skip DC column //{ var array = new double[dctLength]; //accumulate J rows of values for (int i = 0; i < dctLength; i++) { for (int j = 0; j < 5; j++) { array[i] += matrix[r + j, minBin + i]; } } array = DataTools.SubtractMean(array); // DataTools.writeBarGraph(array); double[] dct = MFCCStuff.DCT(array, cosines); for (int i = 0; i < dctLength; i++) { dct[i] = Math.Abs(dct[i]); //convert to absolute values } for (int i = 0; i < 5; i++) { dct[i] = 0.0; //remove low freq values from consideration } if (normaliseDCT) { dct = DataTools.normalise2UnitLength(dct); } int indexOfMaxValue = DataTools.GetMaxIndex(dct); //DataTools.writeBarGraph(dct); double period = hzWidth / (double)indexOfMaxValue * 2; //Times 2 because index = Pi and not 2Pi //mark DCT location with harmonic freq, only if harmonic freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dct[indexOfMaxValue] > dctThreshold) { for (int i = 0; i < dctLength; i++) { hits[r, minBin + i] = period; } for (int i = 0; i < dctLength; i++) { hits[r + 1, minBin + i] = period; //alternate row } } //c += 5; //skip columns //} r++; //do alternate row } return(hits); }
public static Image <Rgb24> GetSonogramImage(double[,] data, int nyquistFreq, int maxFrequency, bool doMelScale, int binHeight, bool doHighlightSubband, int subBandMinHz, int subBandMaxHz) { int width = data.GetLength(0); // Number of spectra in sonogram int fftBins = data.GetLength(1); int maxBin = (int)Math.Floor(fftBins * maxFrequency / (double)nyquistFreq); int imageHeight = maxBin * binHeight; // image ht = sonogram ht. Later include grid and score scales //set up min, max, range for normalising of dB values DataTools.MinMax(data, out double min, out double max); double range = max - min; // readjust min and max to create the effect of contrast stretching. It enhances the spectrogram a bit double fractionalStretching = 0.01; min = min + (range * fractionalStretching); max = max - (range * fractionalStretching); range = max - min; //int? minHighlightFreq = this.subBand_MinHz; //int? maxHighlightFreq = this.subBand_MaxHz; //int minHighlightBin = (minHighlightFreq == null) ? 0 : (int)Math.Round((double)minHighlightFreq / (double)NyquistFrequency * fftBins); //int maxHighlightBin = (maxHighlightFreq == null) ? 0 : (int)Math.Round((double)maxHighlightFreq / (double)NyquistFrequency * fftBins); //calculate top and bottom of sub-band int minHighlightBin = (int)Math.Round(subBandMinHz / (double)nyquistFreq * fftBins); int maxHighlightBin = (int)Math.Round(subBandMaxHz / (double)nyquistFreq * fftBins); if (doMelScale) { double maxMel = MFCCStuff.Mel(nyquistFreq); int melRange = (int)(maxMel - 0 + 1); double pixelPerMel = imageHeight / (double)melRange; double minBandMel = MFCCStuff.Mel(subBandMinHz); double maxBandMel = MFCCStuff.Mel(subBandMaxHz); minHighlightBin = (int)Math.Round(minBandMel * pixelPerMel); maxHighlightBin = (int)Math.Round(maxBandMel * pixelPerMel); } Color[] grayScale = ImageTools.GrayScale(); var bmp = new Image <Rgb24>(width, imageHeight); int yOffset = imageHeight; // for all freq bins for (int y = 0; y < maxBin; y++) { //repeat this bin if ceptral image for (int r = 0; r < binHeight; r++) { // for all pixels in line for (int x = 0; x < width; x++) { // NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range double value = (data[x, y] - min) / range; int c = 255 - (int)Math.Floor(255.0 * value); //original version if (c < 0) { c = 0; } else if (c >= 256) { c = 255; } int g = c + 40; // green tinge used in the template scan band if (g >= 256) { g = 255; } var col = doHighlightSubband && IsInBand(y, minHighlightBin, maxHighlightBin) ? Color.FromRgb((byte)c, (byte)g, (byte)c) : grayScale[c]; bmp[x, yOffset - 1] = col; } yOffset--; } //end repeats over one track } return(bmp); }
} // end method ConvertODScores2Events() /* * public static double PeakEntropy(double[] array) * { * bool[] peaks = DataTools.GetPeaks(array); * int peakCount = DataTools.CountTrues(peaks); * //set up histogram of peak energies * double[] histogram = new double[peakCount]; * int count = 0; * for (int k = 0; k < array.Length; k++) * { * if (peaks[k]) * { * histogram[count] = array[k]; * count++; * } * } * histogram = DataTools.NormaliseMatrixValues(histogram); * histogram = DataTools.Normalise2Probabilites(histogram); * double normFactor = Math.Log(histogram.Length) / DataTools.ln2; //normalize for length of the array * double entropy = DataTools.Entropy(histogram) / normFactor; * return entropy; * } * */ /// <summary> /// returns the periodicity in an array of values. /// </summary> public static double[] PeriodicityAnalysis(double[] array) { //DataTools.writeBarGraph(array); var A = AutoAndCrossCorrelation.MyCrossCorrelation(array, array); // do 2/3rds of maximum possible lag int dctLength = A.Length; A = DataTools.SubtractMean(A); //DataTools.writeBarGraph(A); double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients double[] dct = MFCCStuff.DCT(A, cosines); for (int i = 0; i < dctLength; i++) { dct[i] = Math.Abs(dct[i]); //convert to absolute values } //DataTools.writeBarGraph(dct); for (int i = 0; i < 3; i++) { dct[i] = 0.0; //remove low freq oscillations from consideration } dct = DataTools.normalise2UnitLength(dct); var peaks = DataTools.GetPeaks(dct); // remove non-peak values and low values for (int i = 0; i < dctLength; i++) { if (!peaks[i] || dct[i] < 0.2) { dct[i] = 0.0; } } DataTools.writeBarGraph(dct); //get periodicity of highest three values int peakCount = 3; var period = new double[peakCount]; var maxIndex = new double[peakCount]; for (int i = 0; i < peakCount; i++) { int indexOfMaxValue = DataTools.GetMaxIndex(dct); maxIndex[i] = indexOfMaxValue; //double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi if ((double)indexOfMaxValue == 0) { period[i] = 0.0; } else { period[i] = dctLength / (double)indexOfMaxValue * 2; } dct[indexOfMaxValue] = 0.0; // remove value for next iteration } LoggedConsole.WriteLine("Max indices = {0:f0}, {1:f0}, {2:f0}.", maxIndex[0], maxIndex[1], maxIndex[2]); return(period); }
/// <summary> /// ################ THE KEY ANALYSIS METHOD for TRILLS /// /// See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles. /// </summary> /// <param name="recording"></param> /// <param name="sonoConfig"></param> /// <param name="lwConfig"></param> /// <param name="returnDebugImage"></param> /// <param name="segmentStartOffset"></param> /// <returns></returns> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaWatjulumConfig lwConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { double intensityThreshold = lwConfig.IntensityThreshold; double minDuration = lwConfig.MinDurationOfTrill; // seconds double maxDuration = lwConfig.MaxDurationOfTrill; // seconds double minPeriod = lwConfig.MinPeriod; // seconds double maxPeriod = lwConfig.MaxPeriod; // seconds if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 4 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); //int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7); // Could smooth here rather than above. Above seemed slightly better? //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); //differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, lwConfig.DecibelThreshold, minDuration, maxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // LOOK FOR TRILL EVENTS // init the score array double[] scores = new double[rowCount]; // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedTrillEvents) { int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); double oscilFreq; double period; double intensity; Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } for (int j = 0; j < dctLength; j++) //lay down score for sample length { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}"; ae.Score_MaxInEvent = maximumIntensity; ae.Profile = lwConfig.ProfileNames[0]; confirmedEvents.Add(ae); } } //###################################################################### // LOOK FOR TINK EVENTS // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS double minDurationOfTink = lwConfig.MinDurationOfTink; // seconds double maxDurationOfTink = lwConfig.MaxDurationOfTink; // seconds // want stronger threshold for tink because brief. double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0; var predictedTinkEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, tinkDecibelThreshold, minDurationOfTink, maxDurationOfTink, segmentStartOffset); foreach (var ae2 in predictedTinkEvents) { // Prune the list of potential acoustic events, for example using Cosine Similarity. //rowtop, rowWidth //int eventStart = ae2.Oblong.RowTop; //int eventWidth = ae2.Oblong.RowWidth; //int step = 2; //double maximumIntensity = 0.0; // add abbreviatedSpeciesName into event //if (maximumIntensity >= intensityThreshold) //{ ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}"; //ae2.Score_MaxInEvent = maximumIntensity; ae2.Profile = lwConfig.ProfileNames[1]; confirmedEvents.Add(ae2); //} } //###################################################################### var scorePlot = new Plot(lwConfig.SpeciesName, scores, intensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
public static void Main(Arguments arguments) { //1. set up the necessary files //DirectoryInfo diSource = arguments.Source.Directory; FileInfo fiSourceRecording = arguments.Source; FileInfo fiConfig = arguments.Config.ToFileInfo(); FileInfo fiImage = arguments.Output.ToFileInfo(); fiImage.CreateParentDirectories(); string title = "# CREATE FOUR (4) SONOGRAMS FROM AUDIO RECORDING"; string date = "# DATE AND TIME: " + DateTime.Now; LoggedConsole.WriteLine(title); LoggedConsole.WriteLine(date); LoggedConsole.WriteLine("# Input audio file: " + fiSourceRecording.Name); LoggedConsole.WriteLine("# Output image file: " + fiImage); //2. get the config dictionary Config configuration = ConfigFile.Deserialize(fiConfig); //below three lines are examples of retrieving info from Config config //string analysisIdentifier = configuration[AnalysisKeys.AnalysisName]; //bool saveIntermediateWavFiles = (bool?)configuration[AnalysisKeys.SaveIntermediateWavFiles] ?? false; //scoreThreshold = (double?)configuration[AnalysisKeys.EventThreshold] ?? scoreThreshold; //3 transfer conogram parameters to a dictionary to be passed around var configDict = new Dictionary <string, string>(); // #Resample rate must be 2 X the desired Nyquist. Default is that of recording. configDict["ResampleRate"] = (configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? 17640).ToString(); configDict["FrameLength"] = configuration[AnalysisKeys.FrameLength] ?? "512"; int frameSize = configuration.GetIntOrNull(AnalysisKeys.FrameLength) ?? 512; // #Frame Overlap as fraction: default=0.0 configDict["FrameOverlap"] = configuration[AnalysisKeys.FrameOverlap] ?? "0.0"; double windowOverlap = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.0; // #MinHz: 500 // #MaxHz: 3500 // #NOISE REDUCTION PARAMETERS configDict["DoNoiseReduction"] = configuration["DoNoiseReduction"] ?? "true"; configDict["BgNoiseThreshold"] = configuration["BgNoiseThreshold"] ?? "3.0"; configDict["ADD_AXES"] = configuration["ADD_AXES"] ?? "true"; configDict["AddSegmentationTrack"] = configuration["AddSegmentationTrack"] ?? "true"; // 3: GET RECORDING var startOffsetMins = TimeSpan.Zero; var endOffsetMins = TimeSpan.Zero; FileInfo fiOutputSegment = fiSourceRecording; if (!(startOffsetMins == TimeSpan.Zero && endOffsetMins == TimeSpan.Zero)) { var buffer = new TimeSpan(0, 0, 0); fiOutputSegment = new FileInfo(Path.Combine(fiImage.DirectoryName, "tempWavFile.wav")); //This method extracts segment and saves to disk at the location fiOutputSegment. var resampleRate = configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? AppConfigHelper.DefaultTargetSampleRate; AudioRecording.ExtractSegment(fiSourceRecording, startOffsetMins, endOffsetMins, buffer, resampleRate, fiOutputSegment); } var recording = new AudioRecording(fiOutputSegment.FullName); // EXTRACT ENVELOPE and SPECTROGRAM var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, windowOverlap); // average absolute value over the minute recording ////double[] avAbsolute = dspOutput.Average; // (A) ################################## EXTRACT INDICES FROM THE SIGNAL WAVEFORM ################################## // var wavDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); // double totalSeconds = wavDuration.TotalSeconds; // double[] signalEnvelope = dspOutput.Envelope; // double avSignalEnvelope = signalEnvelope.Average(); // double[] frameEnergy = dspOutput.FrameEnergy; // double highAmplIndex = dspOutput.HighAmplitudeCount / totalSeconds; // double binWidth = dspOutput.BinWidth; // int nyquistBin = dspOutput.NyquistBin; // dspOutput.WindowPower, // dspOutput.FreqBinWidth int nyquistFreq = dspOutput.NyquistFreq; double epsilon = recording.Epsilon; // i: prepare amplitude spectrogram double[,] amplitudeSpectrogramData = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram. var image1 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(amplitudeSpectrogramData)); // ii: prepare decibel spectrogram prior to noise removal double[,] decibelSpectrogramdata = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, recording.SampleRate, epsilon); decibelSpectrogramdata = MatrixTools.NormaliseMatrixValues(decibelSpectrogramdata); var image2 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata)); // iii: Calculate background noise spectrum in decibels // Calculate noise value for each freq bin. double sdCount = 0.0; // number of SDs above the mean for noise removal var decibelProfile = NoiseProfile.CalculateModalNoiseProfile(decibelSpectrogramdata, sdCount); // DataTools.writeBarGraph(dBProfile.NoiseMode); // iv: Prepare noise reduced spectrogram decibelSpectrogramdata = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogramdata, decibelProfile.NoiseThresholds); //double dBThreshold = 1.0; // SPECTRAL dB THRESHOLD for smoothing background //decibelSpectrogramdata = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogramdata, dBThreshold); var image3 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata)); // prepare new sonogram config and draw second image going down different code pathway var config = new SonogramConfig { MinFreqBand = 0, MaxFreqBand = 10000, NoiseReductionType = SNR.KeyToNoiseReductionType("Standard"), NoiseReductionParameter = 1.0, WindowSize = frameSize, WindowOverlap = windowOverlap, }; //var mfccConfig = new MfccConfiguration(config); int bandCount = config.mfccConfig.FilterbankCount; bool doMelScale = config.mfccConfig.DoMelScale; int ccCount = config.mfccConfig.CcCount; int fftBins = config.FreqBinCount; //number of Hz bands = 2^N +1 because includes the DC band int minHz = config.MinFreqBand ?? 0; int maxHz = config.MaxFreqBand ?? nyquistFreq; var standardSonogram = new SpectrogramStandard(config, recording.WavReader); var image4 = standardSonogram.GetImage(); // TODO next line crashes - does not produce cepstral sonogram. //SpectrogramCepstral cepSng = new SpectrogramCepstral(config, recording.WavReader); //Image image5 = cepSng.GetImage(); //var mti = SpectrogramTools.Sonogram2MultiTrackImage(sonogram, configDict); //var image = mti.GetImage(); //Image image = SpectrogramTools.Matrix2SonogramImage(deciBelSpectrogram, config); //Image image = SpectrogramTools.Audio2SonogramImage(FileInfo fiAudio, Dictionary<string, string> configDict); //prepare sonogram images var protoImage6 = new Image_MultiTrack(standardSonogram.GetImage(doHighlightSubband: false, add1KHzLines: true, doMelScale: false)); protoImage6.AddTrack(ImageTrack.GetTimeTrack(standardSonogram.Duration, standardSonogram.FramesPerSecond)); protoImage6.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, protoImage6.SonogramImage.Width)); protoImage6.AddTrack(ImageTrack.GetSegmentationTrack(standardSonogram)); var image6 = protoImage6.GetImage(); var list = new List <Image <Rgb24> >(); list.Add(image1); // amplitude spectrogram list.Add(image2); // decibel spectrogram before noise removal list.Add(image3); // decibel spectrogram after noise removal list.Add(image4); // second version of noise reduced spectrogram //list.Add(image5); // ceptral sonogram list.Add(image6.CloneAs <Rgb24>()); // multitrack image Image finalImage = ImageTools.CombineImagesVertically(list); finalImage.Save(fiImage.FullName); ////2: NOISE REMOVAL //double[,] originalSg = sonogram.Data; //double[,] mnr = sonogram.Data; //mnr = ImageTools.WienerFilter(mnr, 3); //double backgroundThreshold = 4.0; //SETS MIN DECIBEL BOUND //var output = SNR.NoiseReduce(mnr, NoiseReductionType.STANDARD, backgroundThreshold); //double ConfigRange = 70; //sets the the max dB //mnr = SNR.SetConfigRange(output.Item1, 0.0, ConfigRange); ////3: Spectral tracks sonogram //byte[,] binary = MatrixTools.IdentifySpectralRidges(mnr); //binary = MatrixTools.ThresholdBinarySpectrum(binary, mnr, 10); //binary = MatrixTools.RemoveOrphanOnesInBinaryMatrix(binary); ////binary = MatrixTools.PickOutLines(binary); //syntactic approach //sonogram.SetBinarySpectrum(binary); ////sonogram.Data = SNR.SpectralRidges2Intensity(binary, originalSg); //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, false)); //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond)); //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.sonogramImage.Width)); //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); //fn = outputFolder + wavFileName + "_tracks.png"; //image.Save(fn); //LoggedConsole.WriteLine("Spectral tracks sonogram to file: " + fn); //3: prepare image of spectral peaks sonogram //sonogram.Data = SNR.NoiseReduce_Peaks(originalSg, dynamicRange); //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines)); //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration)); //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width)); //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); //fn = outputFolder + wavFileName + "_peaks.png"; //image.Save(fn); //LoggedConsole.WriteLine("Spectral peaks sonogram to file: " + fn); //4: Sobel approach //sonogram.Data = SNR.NoiseReduce_Sobel(originalSg, dynamicRange); //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines)); //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration)); //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width)); //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); //fn = outputFolder + wavFileName + "_sobel.png"; //image.Save(fn); //LoggedConsole.WriteLine("Sobel sonogram to file: " + fn); // I1.txt contains the sonogram matrix produced by matlab //string matlabFile = @"C:\SensorNetworks\Software\AudioAnalysis\AED\Test\matlab\GParrots_JB2_20090607-173000.wav_minute_3\I1.txt"; //double[,] matlabMatrix = Util.fileToMatrix(matlabFile, 256, 5166); //LoggedConsole.WriteLine(matrix[0, 2] + " vs " + matlabMatrix[254, 0]); //LoggedConsole.WriteLine(matrix[0, 3] + " vs " + matlabMatrix[253, 0]); // TODO put this back once sonogram issues resolved /* * LoggedConsole.WriteLine("START: AED"); * IEnumerable<Oblong> oblongs = AcousticEventDetection.detectEvents(3.0, 100, matrix); * LoggedConsole.WriteLine("END: AED"); * * * //set up static variables for init Acoustic events * //AcousticEvent. doMelScale = config.DoMelScale; * AcousticEvent.FreqBinCount = config.FreqBinCount; * AcousticEvent.FreqBinWidth = config.FftConfig.NyquistFreq / (double)config.FreqBinCount; * // int minF = (int)config.MinFreqBand; * // int maxF = (int)config.MaxFreqBand; * AcousticEvent.FrameDuration = config.GetFrameOffset(); * * * var events = new List<EventPatternRecog.Rectangle>(); * foreach (Oblong o in oblongs) * { * var e = new AcousticEvent(o); * events.Add(new EventPatternRecog.Rectangle(e.StartTime, (double) e.MaxFreq, e.StartTime + e.Duration, (double)e.MinFreq)); * //LoggedConsole.WriteLine(e.StartTime + "," + e.Duration + "," + e.MinFreq + "," + e.MaxFreq); * } * * LoggedConsole.WriteLine("# AED events: " + events.Count); * * LoggedConsole.WriteLine("START: EPR"); * IEnumerable<EventPatternRecog.Rectangle> eprRects = EventPatternRecog.detectGroundParrots(events); * LoggedConsole.WriteLine("END: EPR"); * * var eprEvents = new List<AcousticEvent>(); * foreach (EventPatternRecog.Rectangle r in eprRects) * { * var ae = new AcousticEvent(r.Left, r.Right - r.Left, r.Bottom, r.Top, false); * LoggedConsole.WriteLine(ae.WriteProperties()); * eprEvents.Add(ae); * } * * string imagePath = Path.Combine(outputFolder, "RESULTS_" + Path.GetFileNameWithoutExtension(recording.BaseName) + ".png"); * * bool doHighlightSubband = false; bool add1kHzLines = true; * var image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines)); * //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration)); * //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width)); * //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); * image.AddEvents(eprEvents); * image.Save(outputFolder + wavFileName + ".png"); */ LoggedConsole.WriteLine("\nFINISHED!"); }
/// <summary> /// Currently this method is called by only one species recognizer - LitoriaCaerulea. /// </summary> /// <param name="ipArray">an array of decibel values.</param> /// <param name="framesPerSecond">the frame rate.</param> /// <param name="decibelThreshold">Ignore frames below this threshold.</param> /// <param name="dctDuration">Duration in seconds of the required DCT.</param> /// <param name="minOscFreq">minimum oscillation frequency.</param> /// <param name="maxOscFreq">maximum oscillation frequency.</param> /// <param name="dctThreshold">Threshold for the maximum DCT coefficient.</param> /// <param name="dctScores">an array of dct scores.</param> /// <param name="oscFreq">an array of oscillation frequencies.</param> public static void DetectOscillations( double[] ipArray, double framesPerSecond, double decibelThreshold, double dctDuration, double minOscFreq, double maxOscFreq, double dctThreshold, out double[] dctScores, out double[] oscFreq) { int dctLength = (int)Math.Round(framesPerSecond * dctDuration); int minIndex = (int)(minOscFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int maxIndex = (int)(maxOscFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi if (maxIndex > dctLength) { LoggedConsole.WriteWarnLine("MaxIndex > DCT length. Therefore set maxIndex = DCT length."); maxIndex = dctLength; } int length = ipArray.Length; dctScores = new double[length]; oscFreq = new double[length]; //set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //following two lines write bmp image of cosine matrix values for checking. //string bmpPath = @"C:\SensorNetworks\Output\cosines.png"; //ImageTools.DrawMatrix(cosines, bmpPath, true); for (int r = 1; r < length - dctLength; r++) { // only stop if current location is a peak if (ipArray[r] < ipArray[r - 1] || ipArray[r] < ipArray[r + 1]) { continue; } // only stop if current location is a peak if (ipArray[r] < decibelThreshold) { continue; } // extract array and ready for DCT var dctArray = DataTools.Subarray(ipArray, r, dctLength); dctArray = DataTools.SubtractMean(dctArray); double[] dctCoefficient = MFCCStuff.DCT(dctArray, cosines); // convert to absolute values because not interested in negative values due to phase. for (int i = 0; i < dctLength; i++) { dctCoefficient[i] = Math.Abs(dctCoefficient[i]); } // remove low freq oscillations from consideration int thresholdIndex = minIndex / 4; for (int i = 0; i < thresholdIndex; i++) { dctCoefficient[i] = 0.0; } dctCoefficient = DataTools.normalise2UnitLength(dctCoefficient); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficient); //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoefficient[indexOfMaxValue] > dctThreshold) { for (int i = 0; i < dctLength; i++) { if (dctScores[r + i] < dctCoefficient[indexOfMaxValue]) { dctScores[r + i] = dctCoefficient[indexOfMaxValue]; oscFreq[r + i] = indexOfMaxValue / dctDuration / 2; } } } } }
public static void Execute(Arguments arguments) { const string Title = "# DETERMINING SIGNAL TO NOISE RATIO IN RECORDING"; string date = "# DATE AND TIME: " + DateTime.Now; Log.WriteLine(Title); Log.WriteLine(date); Log.Verbosity = 1; var input = arguments.Source; var sourceFileName = input.Name; var outputDir = arguments.Output; var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(input.FullName); var outputTxtPath = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".txt").ToFileInfo(); Log.WriteIfVerbose("# Recording file: " + input.FullName); Log.WriteIfVerbose("# Config file: " + arguments.Config); Log.WriteIfVerbose("# Output folder =" + outputDir.FullName); FileTools.WriteTextFile(outputTxtPath.FullName, date + "\n# Recording file: " + input.FullName); //READ PARAMETER VALUES FROM INI FILE // load YAML configuration Config configuration = ConfigFile.Deserialize(arguments.Config); //ii: SET SONOGRAM CONFIGURATION SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = input.FullName; sonoConfig.WindowSize = configuration.GetIntOrNull(AnalysisKeys.KeyFrameSize) ?? 512; sonoConfig.WindowOverlap = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.5; sonoConfig.WindowFunction = configuration[AnalysisKeys.KeyWindowFunction]; sonoConfig.NPointSmoothFFT = configuration.GetIntOrNull(AnalysisKeys.KeyNPointSmoothFft) ?? 256; sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType(configuration[AnalysisKeys.NoiseReductionType]); int minHz = configuration.GetIntOrNull("MIN_HZ") ?? 0; int maxHz = configuration.GetIntOrNull("MAX_HZ") ?? 11050; double segK1 = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K1") ?? 0; double segK2 = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K2") ?? 0; double latency = configuration.GetDoubleOrNull("K1_K2_LATENCY") ?? 0; double vocalGap = configuration.GetDoubleOrNull("VOCAL_GAP") ?? 0; double minVocalLength = configuration.GetDoubleOrNull("MIN_VOCAL_DURATION") ?? 0; //bool DRAW_SONOGRAMS = (bool?)configuration.DrawSonograms ?? true; //options to draw sonogram //double intensityThreshold = Acoustics.AED.Default.intensityThreshold; //if (dict.ContainsKey(key_AED_INTENSITY_THRESHOLD)) intensityThreshold = Double.Parse(dict[key_AED_INTENSITY_THRESHOLD]); //int smallAreaThreshold = Acoustics.AED.Default.smallAreaThreshold; //if( dict.ContainsKey(key_AED_SMALL_AREA_THRESHOLD)) smallAreaThreshold = Int32.Parse(dict[key_AED_SMALL_AREA_THRESHOLD]); // COnvert input recording into wav var convertParameters = new AudioUtilityRequest { TargetSampleRate = 17640 }; var fileToAnalyse = new FileInfo(Path.Combine(outputDir.FullName, "temp.wav")); if (File.Exists(fileToAnalyse.FullName)) { File.Delete(fileToAnalyse.FullName); } var convertedFileInfo = AudioFilePreparer.PrepareFile( input, fileToAnalyse, convertParameters, outputDir); // (A) ########################################################################################################################## AudioRecording recording = new AudioRecording(fileToAnalyse.FullName); int signalLength = recording.WavReader.Samples.Length; TimeSpan wavDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); double frameDurationInSeconds = sonoConfig.WindowSize / (double)recording.SampleRate; TimeSpan frameDuration = TimeSpan.FromTicks((long)(frameDurationInSeconds * TimeSpan.TicksPerSecond)); int stepSize = (int)Math.Floor(sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap)); double stepDurationInSeconds = sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap) / recording.SampleRate; TimeSpan stepDuration = TimeSpan.FromTicks((long)(stepDurationInSeconds * TimeSpan.TicksPerSecond)); double framesPerSecond = 1 / stepDuration.TotalSeconds; int frameCount = signalLength / stepSize; // (B) ################################## EXTRACT ENVELOPE and SPECTROGRAM ################################## var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts( recording, sonoConfig.WindowSize, sonoConfig.WindowOverlap); //double[] avAbsolute = dspOutput.Average; //average absolute value over the minute recording // (C) ################################## GET SIGNAL WAVEFORM ################################## double[] signalEnvelope = dspOutput.Envelope; double avSignalEnvelope = signalEnvelope.Average(); // (D) ################################## GET Amplitude Spectrogram ################################## double[,] amplitudeSpectrogram = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram. // (E) ################################## Generate deciBel spectrogram from amplitude spectrogram double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra( dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, recording.SampleRate, epsilon); LoggedConsole.WriteLine("# Finished calculating decibel spectrogram."); StringBuilder sb = new StringBuilder(); sb.AppendLine("\nSIGNAL PARAMETERS"); sb.AppendLine("Signal Duration =" + wavDuration); sb.AppendLine("Sample Rate =" + recording.SampleRate); sb.AppendLine("Min Signal Value =" + dspOutput.MinSignalValue); sb.AppendLine("Max Signal Value =" + dspOutput.MaxSignalValue); sb.AppendLine("Max Absolute Ampl =" + signalEnvelope.Max().ToString("F3") + " (See Note 1)"); sb.AppendLine("Epsilon Ampl (1 bit)=" + epsilon); sb.AppendLine("\nFRAME PARAMETERS"); sb.AppendLine("Window Size =" + sonoConfig.WindowSize); sb.AppendLine("Frame Count =" + frameCount); sb.AppendLine("Envelope length=" + signalEnvelope.Length); sb.AppendLine("Frame Duration =" + frameDuration.TotalMilliseconds.ToString("F3") + " ms"); sb.AppendLine("Frame overlap =" + sonoConfig.WindowOverlap); sb.AppendLine("Step Size =" + stepSize); sb.AppendLine("Step duration =" + stepDuration.TotalMilliseconds.ToString("F3") + " ms"); sb.AppendLine("Frames Per Sec =" + framesPerSecond.ToString("F1")); sb.AppendLine("\nFREQUENCY PARAMETERS"); sb.AppendLine("Nyquist Freq =" + dspOutput.NyquistFreq + " Hz"); sb.AppendLine("Freq Bin Width =" + dspOutput.FreqBinWidth.ToString("F2") + " Hz"); sb.AppendLine("Nyquist Bin =" + dspOutput.NyquistBin); sb.AppendLine("\nENERGY PARAMETERS"); double val = dspOutput.FrameEnergy.Min(); sb.AppendLine( "Minimum dB / frame =" + (10 * Math.Log10(val)).ToString("F2") + " (See Notes 2, 3 & 4)"); val = dspOutput.FrameEnergy.Max(); sb.AppendLine("Maximum dB / frame =" + (10 * Math.Log10(val)).ToString("F2")); sb.AppendLine("\ndB NOISE SUBTRACTION"); double noiseRange = 2.0; //sb.AppendLine("Noise (estimate of mode) =" + sonogram.SnrData.NoiseSubtracted.ToString("F3") + " dB (See Note 5)"); //double noiseSpan = sonogram.SnrData.NoiseRange; //sb.AppendLine("Noise range =" + noiseSpan.ToString("F2") + " to +" + (noiseSpan * -1).ToString("F2") + " dB (See Note 6)"); //sb.AppendLine("SNR (max frame-noise) =" + sonogram.SnrData.Snr.ToString("F2") + " dB (See Note 7)"); //sb.Append("\nSEGMENTATION PARAMETERS"); //sb.Append("Segment Thresholds K1: {0:f2}. K2: {1:f2} (See Note 8)", segK1, segK2); //sb.Append("# Event Count = " + predictedEvents.Count()); FileTools.Append2TextFile(outputTxtPath.FullName, sb.ToString()); FileTools.Append2TextFile(outputTxtPath.FullName, GetSNRNotes(noiseRange).ToString()); // (F) ################################## DRAW IMAGE 1: original spectorgram Log.WriteLine("# Start drawing noise reduced sonograms."); TimeSpan X_AxisInterval = TimeSpan.FromSeconds(1); //int Y_AxisInterval = (int)Math.Round(1000 / dspOutput.FreqBinWidth); int nyquist = recording.SampleRate / 2; int hzInterval = 1000; var image1 = DrawSonogram(deciBelSpectrogram, wavDuration, X_AxisInterval, stepDuration, nyquist, hzInterval); // (G) ################################## Calculate modal background noise spectrum in decibels //double SD_COUNT = -0.5; // number of SDs above the mean for noise removal //NoiseReductionType nrt = NoiseReductionType.MODAL; //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, SD_COUNT); //double upperPercentileBound = 0.2; // lowest percentile for noise removal //NoiseReductionType nrt = NoiseReductionType.LOWEST_PERCENTILE; //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound); // (H) ################################## Calculate BRIGGS noise removal from amplitude spectrum int percentileBound = 20; // low energy percentile for noise removal //double binaryThreshold = 0.6; //works for higher SNR recordings double binaryThreshold = 0.4; //works for lower SNR recordings //double binaryThreshold = 0.3; //works for lower SNR recordings double[,] m = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetMask( amplitudeSpectrogram, percentileBound, binaryThreshold); string title = "TITLE"; var image2 = NoiseRemoval_Briggs.DrawSonogram( m, wavDuration, X_AxisInterval, stepDuration, nyquist, hzInterval, title); //Image image2 = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetSonograms(amplitudeSpectrogram, upperPercentileBound, binaryThreshold, // wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval); // (I) ################################## Calculate MEDIAN noise removal from amplitude spectrum //double upperPercentileBound = 0.8; // lowest percentile for noise removal //NoiseReductionType nrt = NoiseReductionType.MEDIAN; //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound); //double[,] noiseReducedSpectrogram1 = tuple.Item1; // //double[] noiseProfile = tuple.Item2; // smoothed modal profile //SNR.NoiseProfile dBProfile = SNR.CalculateNoiseProfile(deciBelSpectrogram, SD_COUNT); // calculate noise value for each freq bin. //double[] noiseProfile = DataTools.filterMovingAverage(dBProfile.noiseThresholds, 7); // smooth modal profile //double[,] noiseReducedSpectrogram1 = SNR.TruncateBgNoiseFromSpectrogram(deciBelSpectrogram, dBProfile.noiseThresholds); //Image image2 = DrawSonogram(noiseReducedSpectrogram1, wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval); var combinedImage = ImageTools.CombineImagesVertically(image1, image2); string imagePath = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".png"); combinedImage.Save(imagePath); // (G) ################################## Calculate modal background noise spectrum in decibels Log.WriteLine("# Finished recording:- " + input.Name); }
//////public static IndexCalculateResult Analysis( public static SpectralIndexValuesForContentDescription Analysis( AudioRecording recording, TimeSpan segmentOffsetTimeSpan, int sampleRateOfOriginalAudioFile, bool returnSonogramInfo = false) { // returnSonogramInfo = true; // if debugging double epsilon = recording.Epsilon; int sampleRate = recording.WavReader.SampleRate; //var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); var indexCalculationDuration = TimeSpan.FromSeconds(ContentSignatures.IndexCalculationDurationInSeconds); // Get FRAME parameters for the calculation of Acoustic Indices int frameSize = ContentSignatures.FrameSize; int frameStep = frameSize; // that is, windowOverlap = zero double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second var frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); // INITIALISE a RESULTS STRUCTURE TO return // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc. var config = new IndexCalculateConfig(); // sets some default values int freqBinCount = frameSize / 2; var indexProperties = GetIndexProperties(); ////////var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, segmentOffsetTimeSpan, config); var spectralIndices = new SpectralIndexValuesForContentDescription(); ///////result.SummaryIndexValues = null; ///////SpectralIndexValues spectralIndices = result.SpectralIndexValues; // set up default spectrogram to return ///////result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null; ///////result.Hits = null; ///////result.TrackScores = new List<Plot>(); // ################################## FINISHED SET-UP // ################################## NOW GET THE AMPLITUDE SPECTROGRAM // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT // Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns. var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep); var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram; // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ################################## // Get the oscillation spectral index OSC separately from signal because need a different frame size etc. var sampleLength = Oscillations2014.DefaultSampleLength; var frameLength = Oscillations2014.DefaultFrameLength; var sensitivity = Oscillations2014.DefaultSensitivityThreshold; var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(recording, frameLength, sampleLength, sensitivity); // double length of the vector because want to work with 256 element vector for spectrogram purposes spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort); // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ################################## // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2. // original sample rate can be anything 11.0-44.1 kHz. int originalNyquist = sampleRateOfOriginalAudioFile / 2; // if up-sampling has been done if (dspOutput1.NyquistFreq > originalNyquist) { dspOutput1.NyquistFreq = originalNyquist; dspOutput1.NyquistBin = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that bin width does not change } // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX spectralIndices.ACI = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram); // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram); for (int i = 0; i < temporalEntropySpectrum.Length; i++) { temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i]; } spectralIndices.ENT = temporalEntropySpectrum; // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ################################## // i: Convert amplitude spectrogram to decibels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); spectralIndices.BGN = spectralDecibelBgn; // ii: Calculate the noise reduced decibel spectrogram derived from segment recording. // REUSE the var decibelSpectrogram but this time using dspOutput1. decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(decibelSpectrogram); // ###################################################################################################################################################### // iv: CALCULATE SPECTRAL COVER. NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0 // FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth // dB THRESHOLD for calculating spectral coverage double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb; // Calculate lower and upper boundary bin ids. // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from bio-phony. int midFreqBound = config.MidFreqBound; int lowFreqBound = config.LowFreqBound; int lowerBinBound = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth); int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth); var spActivity = ActivityAndCover.CalculateSpectralEvents(decibelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound); //spectralIndices.CVR = spActivity.CoverSpectrum; spectralIndices.EVN = spActivity.EventSpectrum; ///////result.TrackScores = null; ///////return result; return(spectralIndices); } // end calculation of Six Spectral Indices
/* * * /// <summary> * /// Detects oscillations in a given freq bin. * /// there are several important parameters for tuning. * /// a) DCTLength: Good values are 0.25 to 0.50 sec. Do not want too long because DCT requires stationarity. * /// Do not want too short because too small a range of oscillations * /// b) DCTindex: Sets lower bound for oscillations of interest. Index refers to array of coeff returned by DCT. * /// Array has same length as the length of the DCT. Low freq oscillations occur more often by chance. Want to exclude them. * /// c) MinAmplitude: minimum acceptable value of a DCT coefficient if hit is to be accepted. * /// The algorithm is sensitive to this value. A lower value results in more oscillation hits being returned. * /// </summary> * /// <param name="minBin">min freq bin of search band</param> * /// <param name="maxBin">max freq bin of search band</param> * /// <param name="dctLength">number of values</param> * /// <param name="DCTindex">Sets lower bound for oscillations of interest.</param> * /// <param name="minAmplitude">threshold - do not accept a DCT value if its amplitude is less than this threshold</param> * public static Double[,] DetectOscillations(SpectrogramStandard sonogram, int minHz, int maxHz, * double dctDuration, int minOscilFreq, int maxOscilFreq, double minAmplitude) * { * int minBin = (int)(minHz / sonogram.FBinWidth); * int maxBin = (int)(maxHz / sonogram.FBinWidth); * * int dctLength = (int)Math.Round(sonogram.FramesPerSecond * dctDuration); * int minIndex = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi * int maxIndex = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi * if (maxIndex > dctLength) maxIndex = dctLength; //safety check in case of future changes to code. * * int rows = sonogram.Data.GetLength(0); * int cols = sonogram.Data.GetLength(1); * Double[,] hits = new Double[rows, cols]; * Double[,] matrix = sonogram.Data; * //matrix = ImageTools.WienerFilter(sonogram.Data, 3);// DO NOT USE - SMUDGES EVERYTHING * * * double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients * //following two lines write matrix of cos values for checking. * //string fPath = @"C:\SensorNetworks\Sonograms\cosines.txt"; * //FileTools.WriteMatrix2File_Formatted(cosines, fPath, "F3"); * * //following two lines write bmp image of cos values for checking. * //string fPath = @"C:\SensorNetworks\Output\cosines.bmp"; * //ImageTools.DrawMatrix(cosines, fPath); * * * * // traverse columns - skip DC column * * * for (int c = minBin; c <= maxBin; c++) { * for (int r = 0; r < rows - dctLength; r++) * { * var array = new double[dctLength]; * //accumulate J columns of values * for (int i = 0; i < dctLength; i++) * for (int j = 0; j < 5; j++) array[i] += matrix[r + i, c + j]; * * array = DataTools.SubtractMean(array); * // DataTools.writeBarGraph(array); * * double[] dct = MFCCStuff.DCT(array, cosines); * for (int i = 0; i < dctLength; i++) dct[i] = Math.Abs(dct[i]);//convert to absolute values * dct[0] = 0.0; dct[1] = 0.0; dct[2] = 0.0; dct[3] = 0.0; dct[4] = 0.0;//remove low freq oscillations from consideration * dct = DataTools.normalise2UnitLength(dct); * //dct = DataTools.NormaliseMatrixValues(dct); //another option to NormaliseMatrixValues * int indexOfMaxValue = DataTools.GetMaxIndex(dct); * double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi * * //DataTools.MinMax(dct, out min, out max); * // DataTools.writeBarGraph(dct); * * //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude * if ((indexOfMaxValue >= minIndex) && (indexOfMaxValue <= maxIndex) && (dct[indexOfMaxValue] > minAmplitude)) * { * for (int i = 0; i < dctLength; i++) hits[r + i, c] = oscilFreq; * } * r += 5; //skip rows * } * c++; //do alternate columns * } * return hits; * } */ public static double[] DetectOscillationsInScoreArray(double[] scoreArray, double dctDuration, double timeScale, double dctThreshold, bool normaliseDCT, int minOscilFreq, int maxOscilFreq) { int dctLength = (int)Math.Round(timeScale * dctDuration); int minIndex = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int maxIndex = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi if (maxIndex > dctLength) { maxIndex = dctLength; //safety check in case of future changes to code. } int length = scoreArray.Length; double[] hits = new double[length]; double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients //following two lines write matrix of cos values for checking. //string fPath = @"C:\SensorNetworks\Sonograms\cosines.txt"; //FileTools.WriteMatrix2File_Formatted(cosines, fPath, "F3"); //following two lines write bmp image of cos values for checking. //string fPath = @"C:\SensorNetworks\Output\cosines.bmp"; //ImageTools.DrawMatrix(cosines, fPath); for (int r = 0; r < length - dctLength; r++) { var array = new double[dctLength]; //transfer values for (int i = 0; i < dctLength; i++) { array[i] = scoreArray[r + i]; } array = DataTools.SubtractMean(array); // DataTools.writeBarGraph(array); double[] dct = MFCCStuff.DCT(array, cosines); for (int i = 0; i < dctLength; i++) { dct[i] = Math.Abs(dct[i]); //convert to absolute values } for (int i = 0; i < 5; i++) { dct[i] = 0.0; //remove low freq oscillations from consideration } if (normaliseDCT) { dct = DataTools.normalise2UnitLength(dct); } int indexOfMaxValue = DataTools.GetMaxIndex(dct); double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi // DataTools.writeBarGraph(dct); //LoggedConsole.WriteLine("oscilFreq = " + oscilFreq); //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dct[indexOfMaxValue] > dctThreshold) { hits[r] = dct[indexOfMaxValue]; hits[r + 1] = dct[indexOfMaxValue]; // because skipping rows. //for (int i = 0; i < dctLength; i++) if (hits[r + i] < dct[indexOfMaxValue]) hits[r + i] = dct[indexOfMaxValue]; } r += 1; //skip rows } return(hits); }