//################################################################################################################################## /// <summary> /// NOTE!!!! The decibel array has been normalised in 0 - 1. /// </summary> protected static Tuple <double[, ], double[]> MakeCepstrogram(SonogramConfig config, double[,] matrix, double[] decibels, int sampleRate) { double[,] m = matrix; int nyquist = sampleRate / 2; double epsilon = config.epsilon; bool includeDelta = config.mfccConfig.IncludeDelta; bool includeDoubleDelta = config.mfccConfig.IncludeDoubleDelta; //Log.WriteIfVerbose(" MakeCepstrogram(matrix, decibels, includeDelta=" + includeDelta + ", includeDoubleDelta=" + includeDoubleDelta + ")"); //(i) APPLY FILTER BANK int bandCount = config.mfccConfig.FilterbankCount; bool doMelScale = config.mfccConfig.DoMelScale; int ccCount = config.mfccConfig.CcCount; int fftBinCount = config.FreqBinCount; //number of Hz bands = 2^N +1. Subtract DC bin int minHz = config.MinFreqBand ?? 0; int maxHz = config.MaxFreqBand ?? nyquist; Log.WriteIfVerbose("ApplyFilterBank(): Dim prior to filter bank =" + matrix.GetLength(1)); //error check that filterBankCount < FFTbins if (bandCount > fftBinCount) { throw new Exception( "## FATAL ERROR in BaseSonogram.MakeCepstrogram():- Can't calculate cepstral coeff. FilterbankCount > FFTbins. (" + bandCount + " > " + fftBinCount + ")\n\n"); } //this is the filter count for full bandwidth 0-Nyquist. This number is trimmed proportionately to fit the required bandwidth. if (doMelScale) { m = MFCCStuff.MelFilterBank(m, bandCount, nyquist, minHz, maxHz); // using the Greg integral } else { m = MFCCStuff.LinearFilterBank(m, bandCount, nyquist, minHz, maxHz); } Log.WriteIfVerbose("\tDim after filter bank=" + m.GetLength(1) + " (Max filter bank=" + bandCount + ")"); //(ii) CONVERT AMPLITUDES TO DECIBELS m = MFCCStuff.DecibelSpectra(m, config.WindowPower, sampleRate, epsilon); //from spectrogram //(iii) NOISE REDUCTION var tuple1 = SNR.NoiseReduce(m, config.NoiseReductionType, config.NoiseReductionParameter); m = tuple1.Item1; //(iv) calculate cepstral coefficients m = MFCCStuff.Cepstra(m, ccCount); //(v) NormaliseMatrixValues m = DataTools.normalise(m); //(vi) Calculate the full range of MFCC coefficients ie including decibel and deltas, etc m = MFCCStuff.AcousticVectors(m, decibels, includeDelta, includeDoubleDelta); var tuple2 = Tuple.Create(m, tuple1.Item2); return(tuple2); // return matrix and full bandwidth modal noise profile }
private double[,] SobelEdgegram(double[,] matrix) { double[,] m = MFCCStuff.DecibelSpectra(matrix, this.Configuration.WindowPower, this.SampleRate, this.Configuration.epsilon); //from spectrogram //double[,] m = Speech.DecibelSpectra(matrix); //NOISE REDUCTION var output = SNR.NoiseReduce(m, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); this.SnrData.ModalNoiseProfile = output.Item2; return(ImageTools.SobelEdgeDetection(output.Item1)); }
/// <summary> /// A FALSE-COLOUR VERSION OF DECIBEL SPECTROGRAM /// Taken and adapted from Spectrogram Image 5 in the method of CLASS Audio2InputForConvCNN.cs:. /// </summary> /// <param name="dbSpectrogramData">the sonogram data (NOT noise reduced). </param> public static Image <Rgb24> DrawStandardSpectrogramInFalseColour(double[,] dbSpectrogramData) { // Do NOISE REDUCTION double noiseReductionParameter = 2.0; var tuple = SNR.NoiseReduce(dbSpectrogramData, NoiseReductionType.Standard, noiseReductionParameter); double[,] nrSpectrogramData = tuple.Item1; // store data matrix double ridgeThreshold = 2.5; double[,] matrix = dbSpectrogramData; byte[,] hits = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold); // ################### RESEARCH QUESTION: // I tried different EXPERIMENTS IN NORMALISATION //double min; double max; //DataTools.MinMax(spectralSelection, out min, out max); //double range = max - min; // readjust min and max to create the effect of contrast stretching. It enhances the spectrogram a bit //double fractionalStretching = 0.2; //min = min + (range * fractionalStretching); //max = max - (range * fractionalStretching); //range = max - min; // ULTIMATELY THE BEST APPROACH APPEARED TO BE FIXED NORMALISATION BOUNDS double truncateMin = -95.0; double truncateMax = -30.0; double filterCoefficient = 0.75; double[,] dbSpectrogramNorm = SpectrogramTools.NormaliseSpectrogramMatrix(dbSpectrogramData, truncateMin, truncateMax, filterCoefficient); truncateMin = 0; truncateMax = 50; // nr = noise reduced double[,] nrSpectrogramNorm = SpectrogramTools.NormaliseSpectrogramMatrix(nrSpectrogramData, truncateMin, truncateMax, filterCoefficient); nrSpectrogramNorm = MatrixTools.BoundMatrix(nrSpectrogramNorm, 0.0, 0.9); nrSpectrogramNorm = MatrixTools.SquareRootOfValues(nrSpectrogramNorm); nrSpectrogramNorm = DataTools.normalise(nrSpectrogramNorm); // create image from normalised data var image = SpectrogramTools.CreateFalseColourDecibelSpectrogramForZooming(dbSpectrogramNorm, nrSpectrogramNorm, hits); return(image); }
/// <summary> /// This method produces four spectrograms using four different values of neighbour hood decibel threshold. /// It can be used for test purposes. /// </summary> /// <param name="deciBelSpectrogram">the noisy decibel spectrogram</param> /// <param name="xAxisInterval">x-axis tic interval</param> /// <param name="stepDuration">the x-axis times scale</param> /// <param name="nyquist">max freq value</param> /// <param name="hzInterval">y-axis frequency scale</param> /// <returns>Image containing four sepctrograms</returns> public static Image ModalNoiseRemovalAndGetSonograms( double[,] deciBelSpectrogram, TimeSpan xAxisInterval, TimeSpan stepDuration, int nyquist, int hzInterval) { // The number of SDs above the mean for noise removal. // Set sdCount = -0.5 becuase when sdCount >= zero, noies removal is a bit severe for environmental recordings. var sdCount = -0.5; var nrt = NoiseReductionType.Modal; var tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, sdCount); var noiseReducedSpectrogram1 = tuple.Item1; var title = "title1"; var image1 = DrawSonogram(noiseReducedSpectrogram1, xAxisInterval, stepDuration, nyquist, hzInterval, title); double dBThreshold = 0.0; // SPECTRAL dB THRESHOLD for smoothing background double[,] noiseReducedSpectrogram2 = SNR.RemoveNeighbourhoodBackgroundNoise(noiseReducedSpectrogram1, dBThreshold); title = "title2"; var image2 = DrawSonogram(noiseReducedSpectrogram2, xAxisInterval, stepDuration, nyquist, hzInterval, title); // SPECTRAL dB THRESHOLD for smoothing background dBThreshold = 3.0; noiseReducedSpectrogram2 = SNR.RemoveNeighbourhoodBackgroundNoise(noiseReducedSpectrogram1, dBThreshold); title = "title3"; var image3 = DrawSonogram(noiseReducedSpectrogram2, xAxisInterval, stepDuration, nyquist, hzInterval, title); // SPECTRAL dB THRESHOLD for smoothing background dBThreshold = 10.0; noiseReducedSpectrogram2 = SNR.RemoveNeighbourhoodBackgroundNoise(noiseReducedSpectrogram1, dBThreshold); title = "title4"; var image4 = DrawSonogram(noiseReducedSpectrogram2, xAxisInterval, stepDuration, nyquist, hzInterval, title); var array = new Image[4]; array[0] = image1; array[1] = image2; array[2] = image3; array[3] = image4; var combinedImage = ImageTools.CombineImagesVertically(array); return(combinedImage); }
/// <summary> /// Initializes a new instance of the <see cref="DecibelSpectrogram"/> class. /// </summary> public DecibelSpectrogram(AmplitudeSpectrogram amplitudeSpectrogram) { this.Configuration = amplitudeSpectrogram.Configuration; this.Attributes = amplitudeSpectrogram.Attributes; // (ii) CONVERT AMPLITUDES TO DECIBELS this.Data = MFCCStuff.DecibelSpectra(amplitudeSpectrogram.Data, this.Attributes.WindowPower, this.Attributes.SampleRate, this.Attributes.Epsilon); // (iii) NOISE REDUCTION var tuple = SNR.NoiseReduce(this.Data, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); this.Data = tuple.Item1; // store data matrix if (this.SnrData != null) { this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile } }
}//end CONSTRUCTOR public override void Make(double[,] amplitudeM) { double[,] m = amplitudeM; // (i) IF REQUIRED CONVERT TO FULL BAND WIDTH MEL SCALE // Make sure you have Configuration.MelBinCount somewhere if (this.Configuration.DoMelScale) { m = MFCCStuff.MelFilterBank(m, this.Configuration.MelBinCount, this.NyquistFrequency, 0, this.NyquistFrequency); // using the Greg integral } // (ii) CONVERT AMPLITUDES TO DECIBELS m = MFCCStuff.DecibelSpectra(m, this.Configuration.WindowPower, this.SampleRate, this.Configuration.epsilon); // (iii) NOISE REDUCTION var tuple = SNR.NoiseReduce(m, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); this.Data = tuple.Item1; // store data matrix if (this.SnrData != null) { this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile } }
public static void Execute(Arguments arguments) { if (arguments == null) { arguments = Dev(); } LoggedConsole.WriteLine("DATE AND TIME:" + DateTime.Now); LoggedConsole.WriteLine("Syntactic Pattern Recognition\n"); //StringBuilder sb = new StringBuilder("DATE AND TIME:" + DateTime.Now + "\n"); //sb.Append("SCAN ALL RECORDINGS IN A DIRECTORY USING HTK-RECOGNISER\n"); Log.Verbosity = 1; FileInfo recordingPath = arguments.Source; FileInfo iniPath = arguments.Config; DirectoryInfo outputDir = arguments.Output; string opFName = "SPR-output.txt"; string opPath = outputDir + opFName; Log.WriteIfVerbose("# Output folder =" + outputDir); // A: READ PARAMETER VALUES FROM INI FILE var config = new ConfigDictionary(iniPath); Dictionary <string, string> dict = config.GetTable(); Dictionary <string, string> .KeyCollection keys = dict.Keys; string callName = dict[key_CALL_NAME]; double frameOverlap = Convert.ToDouble(dict[key_FRAME_OVERLAP]); //SPT PARAMETERS double intensityThreshold = Convert.ToDouble(dict[key_SPT_INTENSITY_THRESHOLD]); int smallLengthThreshold = Convert.ToInt32(dict[key_SPT_SMALL_LENGTH_THRESHOLD]); //WHIPBIRD PARAMETERS int whistle_MinHz = int.Parse(dict[key_WHISTLE_MIN_HZ]); int whistle_MaxHz = int.Parse(dict[key_WHISTLE_MAX_HZ]); double optimumWhistleDuration = double.Parse(dict[key_WHISTLE_DURATION]); //optimum duration of whistle in seconds int whip_MinHz = (dict.ContainsKey(key_WHIP_MIN_HZ)) ? int.Parse(dict[key_WHIP_MIN_HZ]) : 0; int whip_MaxHz = (dict.ContainsKey(key_WHIP_MAX_HZ)) ? int.Parse(dict[key_WHIP_MAX_HZ]) : 0; double whipDuration = (dict.ContainsKey(key_WHIP_DURATION)) ? double.Parse(dict[key_WHIP_DURATION]) : 0.0; //duration of whip in seconds //CURLEW PARAMETERS double minDuration = (dict.ContainsKey(key_MIN_DURATION)) ? double.Parse(dict[key_MIN_DURATION]) : 0.0; //min duration of call in seconds double maxDuration = (dict.ContainsKey(key_MAX_DURATION)) ? double.Parse(dict[key_MAX_DURATION]) : 0.0; //duration of call in seconds double eventThreshold = double.Parse(dict[key_EVENT_THRESHOLD]); //min score for an acceptable event int DRAW_SONOGRAMS = Convert.ToInt16(dict[key_DRAW_SONOGRAMS]); // B: CHECK to see if conversion from .MP3 to .WAV is necessary var destinationAudioFile = recordingPath; //LOAD RECORDING AND MAKE SONOGRAM BaseSonogram sonogram = null; using (var recording = new AudioRecording(destinationAudioFile.FullName)) { // if (recording.SampleRate != 22050) recording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE var sonoConfig = new SonogramConfig { NoiseReductionType = NoiseReductionType.None, //NoiseReductionType = NoiseReductionType.STANDARD, WindowOverlap = frameOverlap, }; sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); } List <AcousticEvent> predictedEvents = null; double[,] hits = null; double[] scores = null; var audioFileName = Path.GetFileNameWithoutExtension(destinationAudioFile.FullName); if (callName.Equals("WHIPBIRD")) { //SPT var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold); //SPR Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold); int slope = 0; //degrees of the circle. i.e. 90 = vertical line. double sensitivity = 0.7; //lower value = more sensitive var mHori = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity); slope = 87; //84 sensitivity = 0.8; //lower value = more sensitive var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 4, intensityThreshold + 1, sensitivity); Log.WriteLine("SPR finished"); Log.WriteLine("Extract Whipbird calls - start"); int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth); int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth); int whistleFrames = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); //86 = frames/sec. int minBound_Whip = (int)(whip_MinHz / sonogram.FBinWidth); int maxBound_Whip = (int)(whip_MaxHz / sonogram.FBinWidth); int whipFrames = (int)(sonogram.FramesPerSecond * whipDuration); //86 = frames/sec. var result3 = DetectWhipBird(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames, minBound_Whip, maxBound_Whip, whipFrames, smallLengthThreshold); scores = result3.Item1; hits = DataTools.AddMatrices(mHori, mVert); predictedEvents = AcousticEvent.ConvertScoreArray2Events( scores, whip_MinHz, whip_MaxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, eventThreshold, minDuration, maxDuration, TimeSpan.Zero); foreach (AcousticEvent ev in predictedEvents) { ev.FileName = audioFileName; ev.Name = callName; } sonogram.Data = result1.Item1; Log.WriteLine("Extract Whipbird calls - finished"); } else if (callName.Equals("CURLEW")) { //SPT double backgroundThreshold = 4.0; var result1 = SNR.NoiseReduce(sonogram.Data, NoiseReductionType.Standard, backgroundThreshold); //var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold); //var result1 = doNoiseRemoval(sonogram, intensityThreshold, smallLengthThreshold); //SPR Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold); int slope = 20; //degrees of the circle. i.e. 90 = vertical line. double sensitivity = 0.8; //lower value = more sensitive var mHori = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity); slope = 160; sensitivity = 0.8; //lower value = more sensitive var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 3, intensityThreshold + 1, sensitivity); Log.WriteLine("SPR finished"); //detect curlew calls int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth); int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth); int whistleFrames = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); var result3 = DetectCurlew(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames, smallLengthThreshold); //process curlew scores - look for curlew characteristic periodicity double minPeriod = 1.2; double maxPeriod = 1.8; int minPeriod_frames = (int)Math.Round(sonogram.FramesPerSecond * minPeriod); int maxPeriod_frames = (int)Math.Round(sonogram.FramesPerSecond * maxPeriod); scores = DataTools.filterMovingAverage(result3.Item1, 21); scores = DataTools.PeriodicityDetection(scores, minPeriod_frames, maxPeriod_frames); //extract events predictedEvents = AcousticEvent.ConvertScoreArray2Events( scores, whistle_MinHz, whistle_MaxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, eventThreshold, minDuration, maxDuration, TimeSpan.Zero); foreach (AcousticEvent ev in predictedEvents) { ev.FileName = audioFileName; ev.Name = callName; } hits = DataTools.AddMatrices(mHori, mVert); sonogram.Data = result1.Item1; Log.WriteLine("Extract Curlew calls - finished"); } else if (callName.Equals("CURRAWONG")) { //SPT var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold); //SPR Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold); int slope = 70; //degrees of the circle. i.e. 90 = vertical line. //slope = 210; double sensitivity = 0.7; //lower value = more sensitive var mHori = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity); slope = 110; //slope = 340; sensitivity = 0.7; //lower value = more sensitive var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 3, intensityThreshold + 1, sensitivity); Log.WriteLine("SPR finished"); int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth); int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth); int whistleFrames = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); //86 = frames/sec. var result3 = DetectCurlew(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames + 10, smallLengthThreshold); scores = result3.Item1; hits = DataTools.AddMatrices(mHori, mVert); predictedEvents = AcousticEvent.ConvertIntensityArray2Events( scores, TimeSpan.Zero, whistle_MinHz, whistle_MaxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, eventThreshold, 0.5, maxDuration); foreach (AcousticEvent ev in predictedEvents) { ev.FileName = audioFileName; //ev.Name = callName; } } //write event count to results file. double sigDuration = sonogram.Duration.TotalSeconds; //string fname = Path.GetFileName(recordingPath); int count = predictedEvents.Count; Log.WriteIfVerbose("Number of Events: " + count); string str = string.Format("{0}\t{1}\t{2}", callName, sigDuration, count); FileTools.WriteTextFile(opPath, AcousticEvent.WriteEvents(predictedEvents, str).ToString()); // SAVE IMAGE string imageName = outputDir + audioFileName; string imagePath = imageName + ".png"; if (File.Exists(imagePath)) { int suffix = 1; while (File.Exists(imageName + "." + suffix.ToString() + ".png")) { suffix++; } //{ // suffix = (suffix == string.Empty) ? "1" : (int.Parse(suffix) + 1).ToString(); //} //File.Delete(outputDir + audioFileName + "." + suffix.ToString() + ".png"); File.Move(imagePath, imageName + "." + suffix.ToString() + ".png"); } //string newPath = imagePath + suffix + ".png"; if (DRAW_SONOGRAMS == 2) { DrawSonogram(sonogram, imagePath, hits, scores, predictedEvents, eventThreshold); } else if ((DRAW_SONOGRAMS == 1) && (predictedEvents.Count > 0)) { DrawSonogram(sonogram, imagePath, hits, scores, predictedEvents, eventThreshold); } Log.WriteIfVerbose("Image saved to: " + imagePath); //string savePath = outputDir + Path.GetFileNameWithoutExtension(recordingPath); //string suffix = string.Empty; //Image im = sonogram.GetImage(false, false); //string newPath = savePath + suffix + ".jpg"; //im.Save(newPath); LoggedConsole.WriteLine("\nFINISHED RECORDING!"); Console.ReadLine(); }