/// <summary> /// AN EXPERIMENTAL SPECTROGRAM - A FALSE-COLOR VERSION OF A standard scale SPECTROGRAM. /// </summary> /// <param name="dbSpectrogramData">The original data for decibel spectrogram.</param> /// <param name="nrSpectrogram">The noise-reduced spectrogram.</param> /// <param name="sourceRecordingName">Name of the source file. Required only to add label to spectrogram.</param> /// <returns>Image of spectrogram.</returns> public static Image <Rgb24> GetDecibelSpectrogram_Ridges( double[,] dbSpectrogramData, SpectrogramStandard nrSpectrogram, string sourceRecordingName) { // ########################### SOBEL ridge detection var ridgeThreshold = 3.5; var matrix = ImageTools.WienerFilter(dbSpectrogramData, 3); var hits = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold); // ########################### EIGEN ridge detection //double ridgeThreshold = 6.0; //double dominanceThreshold = 0.7; //var rotatedData = MatrixTools.MatrixRotate90Anticlockwise(dbSpectrogramData); //byte[,] hits = RidgeDetection.StructureTensorRidgeDetection(rotatedData, ridgeThreshold, dominanceThreshold); //hits = MatrixTools.MatrixRotate90Clockwise(hits); // ########################### EIGEN ridge detection var frameStep = nrSpectrogram.Configuration.WindowStep; var sampleRate = nrSpectrogram.SampleRate; var image = SpectrogramTools.CreateFalseColourDecibelSpectrogram(dbSpectrogramData, nrSpectrogram.Data, hits); image = BaseSonogram.GetImageAnnotatedWithLinearHertzScale( image, sampleRate, frameStep, $"AN EXPERIMENTAL DECIBEL SPECTROGRAM with ridges ({sourceRecordingName})", ImageTags[Experimental]); //var image = decibelSpectrogram.GetImageFullyAnnotated("DECIBEL SPECTROGRAM - with ridges"); return(image); }
public static Image <Rgb24> FrameZoomSpectrogram(Image <Rgb24> bmp1, Image <Rgb24> titleBar, TimeSpan startOffset, TimeSpan xAxisPixelDuration, TimeSpan xAxisTicInterval, int nyquist, int herzInterval) { TimeSpan fullDuration = TimeSpan.FromTicks(xAxisPixelDuration.Ticks * bmp1.Width); // init frequency scale int frameSize = bmp1.Height * 2; // THIS MIGHT BECOME A BUG ONE DAY!!!!! var freqScale = new FrequencyScale(nyquist, frameSize, herzInterval); SpectrogramTools.DrawGridLinesOnImage((Image <Rgb24>)bmp1, startOffset, fullDuration, xAxisTicInterval, freqScale); int trackHeight = 20; // put start offset into a datetime object. var dto = default(DateTimeOffset); dto = dto + startOffset; Image <Rgb24> timeBmp = ImageTrack.DrawTimeTrack(fullDuration, dto, bmp1.Width, trackHeight); int imageHt = bmp1.Height + titleBar.Height + trackHeight + 1; Image <Rgb24> compositeBmp = new Image <Rgb24>(bmp1.Width, imageHt); //get canvas for entire image compositeBmp.Mutate(gr => { gr.Clear(Color.Black); int offset = 0; gr.DrawImage(titleBar, 0, offset); //draw in the top time scale offset += titleBar.Height; gr.DrawImage(bmp1, 0, offset); //draw offset += bmp1.Height; gr.DrawImage(timeBmp, 0, offset); //draw }); return(compositeBmp); }
/// <summary> /// THis method can be modified if want to do something non-standard with the output spectrogram. /// </summary> internal static void SaveDebugSpectrogram(RecognizerResults results, Config genericConfig, DirectoryInfo outputDirectory, string baseName) { //var image = sonogram.GetImageFullyAnnotated("Test"); var image = SpectrogramTools.GetSonogramPlusCharts(results.Sonogram, results.Events, results.Plots, null); image.Save(Path.Combine(outputDirectory.FullName, baseName + ".profile.png")); }
private void WriteDebugImage( AudioRecording recording, DirectoryInfo outputDirectory, BaseSonogram sonogram, List <AcousticEvent> acousticEvents, List <Plot> plots, double[,] hits) { //DEBUG IMAGE this recogniser only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { Image debugImage1 = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, plots, hits); var debugPath1 = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram1")); debugImage1.Save(debugPath1.FullName); // save new image with longer frame var sonoConfig2 = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 1024, WindowOverlap = 0, //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.1, }; BaseSonogram sonogram2 = new SpectrogramStandard(sonoConfig2, recording.WavReader); var debugPath2 = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram2")); Image debugImage2 = SpectrogramTools.GetSonogramPlusCharts(sonogram2, acousticEvents, plots, null); debugImage2.Save(debugPath2.FullName); } }
public static Image <Rgb24> FrameSliceOf3DSpectrogram_DayOfYear(Image <Rgb24> bmp1, Image <Rgb24> titleBar, int year, int dayOfYear, TimeSpan xInterval, int herzValue, FileInfo sunriseSetData, int nyquistFreq) { Image <Rgb24> suntrack = SunAndMoon.AddSunTrackToImage(bmp1.Width, sunriseSetData, year, dayOfYear); bmp1.Mutate(g => { Pen pen = new Pen(Color.White, 1); var stringFont = Drawing.Arial12; //Font stringFont = Drawing.Tahoma9; DateTime theDate = new DateTime(year, 1, 1).AddDays(dayOfYear - 1); string dateString = $"{year} {DataTools.MonthNames[theDate.Month - 1]} {theDate.Day:d2}"; g.DrawText(dateString, stringFont, Color.Wheat, new PointF(10, 3)); }); TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(60); var minuteOffset = TimeSpan.Zero; double secondsDuration = xAxisPixelDuration.TotalSeconds * bmp1.Width; TimeSpan fullDuration = TimeSpan.FromSeconds(secondsDuration); // init frequency scale int herzInterval = 1000; int frameSize = bmp1.Height; var freqScale = new DSP.FrequencyScale(nyquistFreq, frameSize, herzInterval); SpectrogramTools.DrawGridLinesOnImage((Image <Rgb24>)bmp1, minuteOffset, fullDuration, xInterval, freqScale); int trackHeight = 20; int imageHt = bmp1.Height + trackHeight + trackHeight + trackHeight; var xAxisTicInterval = TimeSpan.FromMinutes(60); // assume 60 pixels per hour var timeScale24Hour = ImageTrack.DrawTimeTrack(fullDuration, minuteOffset, xAxisTicInterval, bmp1.Width, trackHeight, "hours"); var imageList = new List <Image <Rgb24> > { titleBar, timeScale24Hour, suntrack, bmp1, timeScale24Hour }; var compositeBmp = ImageTools.CombineImagesVertically(imageList); // trackHeight = compositeBmp.Height; // Image<Rgb24> timeScale12Months = ImageTrack.DrawYearScaleVertical(40, trackHeight); // Image<Rgb24> freqScale = DrawFreqScale_vertical(40, trackHeight, HerzValue, nyquistFreq); imageList = new List <Image <Rgb24> >(); // imageList.Add(timeScale12Months); imageList.Add(compositeBmp); // imageList.Add(freqScale); compositeBmp = ImageTools.CombineImagesInLine(imageList.ToArray()); return(compositeBmp); }
/// <summary> /// This method only called from Indexcalculate when returning image of the sonogram for the passed recording segment. /// </summary> public static double[] ConvertSpectralPeaksToNormalisedArray(double[,] spectrogram) { // convert spectral peaks to frequency and frames var tupleDecibelPeaks = SpectrogramTools.HistogramOfSpectralPeaks(spectrogram); // Item2 is length of Score Array and stores the bin in which the max peak is located. // Normalise this for display in score track return(DataTools.normalise(tupleDecibelPeaks.Item2)); }
} //Analysis() //private static System.Tuple<string[], Type[], bool[]> InitOutputTableColumns() //{ // HEADERS[0] = header_count; COL_TYPES[0] = typeof(int); DISPLAY_COLUMN[0] = false; COMBO_WEIGHTS[0] = 0.0; // HEADERS[1] = header_startMin; COL_TYPES[1] = typeof(double); DISPLAY_COLUMN[1] = false; COMBO_WEIGHTS[1] = 0.0; // HEADERS[2] = header_SecondsDuration; COL_TYPES[2] = typeof(double); DISPLAY_COLUMN[2] = false; COMBO_WEIGHTS[2] = 0.0; // HEADERS[3] = header_avAmpdB; COL_TYPES[3] = typeof(double); DISPLAY_COLUMN[3] = true; COMBO_WEIGHTS[3] = 0.0; // HEADERS[4] = header_snrdB; COL_TYPES[4] = typeof(double); DISPLAY_COLUMN[4] = true; COMBO_WEIGHTS[4] = 0.0; // HEADERS[5] = header_bgdB; COL_TYPES[5] = typeof(double); DISPLAY_COLUMN[5] = true; COMBO_WEIGHTS[5] = 0.0; // HEADERS[6] = header_activity; COL_TYPES[6] = typeof(double); DISPLAY_COLUMN[6] = true; COMBO_WEIGHTS[6] = 0.0; // HEADERS[7] = header_hfCover; COL_TYPES[7] = typeof(double); DISPLAY_COLUMN[7] = true; COMBO_WEIGHTS[7] = 0.0; // HEADERS[8] = header_mfCover; COL_TYPES[8] = typeof(double); DISPLAY_COLUMN[8] = true; COMBO_WEIGHTS[8] = 0.0; // HEADERS[9] = header_lfCover; COL_TYPES[9] = typeof(double); DISPLAY_COLUMN[9] = true; COMBO_WEIGHTS[9] = 0.0; // HEADERS[10] = header_HAmpl; COL_TYPES[10] = typeof(double); DISPLAY_COLUMN[10] = true; COMBO_WEIGHTS[10] = 0.0; // HEADERS[11] = header_HAvSpectrum; COL_TYPES[11] = typeof(double); DISPLAY_COLUMN[11] = true; COMBO_WEIGHTS[11] = 0.4; // //HEADERS[12] = header_HVarSpectrum; COL_TYPES[12] = typeof(double); DISPLAY_COLUMN[12] = false; COMBO_WEIGHTS[12] = 0.1; // return Tuple.Create(HEADERS, COL_TYPES, DISPLAY_COLUMN); //} static Image DrawSonogram(BaseSonogram sonogram, List <Plot> scores) { Dictionary <string, string> configDict = new Dictionary <string, string>(); List <AcousticEvent> predictedEvents = null; double eventThreshold = 0.0; Image image = SpectrogramTools.Sonogram2Image(sonogram, configDict, null, scores, predictedEvents, eventThreshold); return(image); }
/* * /// <summary> * /// Summarize your results. This method is invoked exactly once per original file. * /// </summary> * public override void SummariseResults( * AnalysisSettings settings, * FileSegment inputFileSegment, * EventBase[] events, * SummaryIndexBase[] indices, * SpectralIndexBase[] spectralIndices, * AnalysisResult2[] results) * { * // No operation - do nothing. Feel free to add your own logic. * base.SummariseResults(settings, inputFileSegment, events, indices, spectralIndices, results); * } */ /// <summary> /// THis method can be modified if want to do something non-standard with the output spectrogram. /// </summary> public static string SaveDebugSpectrogram(RecognizerResults results, Config genericConfig, DirectoryInfo outputDirectory, string baseName) { var image3 = SpectrogramTools.GetSonogramPlusCharts(results.Sonogram, results.NewEvents, results.Plots, null); var path = Path.Combine(outputDirectory.FullName, baseName + ".profile.png"); image3.Save(path); return(path); }
public void TestAnnotatedSonogramWithPlots() { // Make a decibel spectrogram var actualDecibelSpectrogram = new SpectrogramStandard(this.sonoConfig, this.recording.WavReader); // prepare normalisation bounds for three plots double minDecibels = -100.0; double maxDecibels = -50; //double decibelThreshold = 12.5 dB above -100 dB; var normThreshold = 0.25; //plot 1 int minHz = 2000; int maxHz = 3000; var decibelArray = SNR.CalculateFreqBandAvIntensity(actualDecibelSpectrogram.Data, minHz, maxHz, actualDecibelSpectrogram.NyquistFrequency); var normalisedIntensityArray = DataTools.NormaliseInZeroOne(decibelArray, minDecibels, maxDecibels); var plot1 = new Plot("Intensity 2-3 kHz", normalisedIntensityArray, normThreshold); //plot 2 minHz = 3000; maxHz = 4000; decibelArray = SNR.CalculateFreqBandAvIntensity(actualDecibelSpectrogram.Data, minHz, maxHz, actualDecibelSpectrogram.NyquistFrequency); normalisedIntensityArray = DataTools.NormaliseInZeroOne(decibelArray, minDecibels, maxDecibels); var plot2 = new Plot("Intensity 3-4 kHz", normalisedIntensityArray, normThreshold); //plot 3 minHz = 4000; maxHz = 5000; decibelArray = SNR.CalculateFreqBandAvIntensity(actualDecibelSpectrogram.Data, minHz, maxHz, actualDecibelSpectrogram.NyquistFrequency); normalisedIntensityArray = DataTools.NormaliseInZeroOne(decibelArray, minDecibels, maxDecibels); var plot3 = new Plot("Intensity 4-5 kHz", normalisedIntensityArray, normThreshold); // combine the plots var plots = new List <Plot> { plot1, plot2, plot3 }; // create three events var startOffset = TimeSpan.Zero; var events = new List <AcousticEvent> { new AcousticEvent(startOffset, 10.0, 10.0, 2000, 3000), new AcousticEvent(startOffset, 25.0, 10.0, 3000, 4000), new AcousticEvent(startOffset, 40.0, 10.0, 4000, 5000), }; var image = SpectrogramTools.GetSonogramPlusCharts(actualDecibelSpectrogram, events, plots, null); // create the image for visual confirmation image.Save(Path.Combine(this.outputDirectory.FullName, this.recording.BaseName + ".png")); Assert.AreEqual(1621, image.Width); Assert.AreEqual(647, image.Height); }
public static Image FrameSliceOf3DSpectrogram_DayOfYear(Image bmp1, Image titleBar, int year, int dayOfYear, TimeSpan xInterval, int herzValue, FileInfo sunriseSetData, int nyquistFreq) { Bitmap suntrack = SunAndMoon.AddSunTrackToImage(bmp1.Width, sunriseSetData, year, dayOfYear); Graphics g = Graphics.FromImage(bmp1); Pen pen = new Pen(Color.White); Font stringFont = new Font("Arial", 12); //Font stringFont = new Font("Tahoma", 9); DateTime theDate = new DateTime(year, 1, 1).AddDays(dayOfYear - 1); string dateString = string.Format("{0} {1} {2:d2}", year, DataTools.MonthNames[theDate.Month - 1], theDate.Day); g.DrawString(dateString, stringFont, Brushes.Wheat, new PointF(10, 3)); TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(60); var minuteOffset = TimeSpan.Zero; double secondsDuration = xAxisPixelDuration.TotalSeconds * bmp1.Width; TimeSpan fullDuration = TimeSpan.FromSeconds(secondsDuration); // init frequency scale int herzInterval = 1000; int frameSize = bmp1.Height; var freqScale = new DSP.FrequencyScale(nyquistFreq, frameSize, herzInterval); SpectrogramTools.DrawGridLinesOnImage((Bitmap)bmp1, minuteOffset, fullDuration, xInterval, freqScale); int trackHeight = 20; int imageHt = bmp1.Height + trackHeight + trackHeight + trackHeight; var xAxisTicInterval = TimeSpan.FromMinutes(60); // assume 60 pixels per hour var timeScale24Hour = ImageTrack.DrawTimeTrack(fullDuration, minuteOffset, xAxisTicInterval, bmp1.Width, trackHeight, "hours"); var imageList = new List <Image>(); imageList.Add(titleBar); imageList.Add(timeScale24Hour); imageList.Add(suntrack); imageList.Add(bmp1); imageList.Add(timeScale24Hour); Image compositeBmp = ImageTools.CombineImagesVertically(imageList.ToArray()); // trackHeight = compositeBmp.Height; // Bitmap timeScale12Months = ImageTrack.DrawYearScaleVertical(40, trackHeight); // Bitmap freqScale = DrawFreqScale_vertical(40, trackHeight, HerzValue, nyquistFreq); imageList = new List <Image>(); // imageList.Add(timeScale12Months); imageList.Add(compositeBmp); // imageList.Add(freqScale); compositeBmp = ImageTools.CombineImagesInLine(imageList.ToArray()); return(compositeBmp); }
protected virtual Image DrawSonogram( BaseSonogram sonogram, double[,] hits, List <Plot> scores, List <AcousticEvent> predictedEvents, double eventThreshold) { var image = SpectrogramTools.GetSonogramPlusCharts(sonogram, predictedEvents, scores, hits); return(image); }
public static Tuple <BaseSonogram, AcousticEvent, double[, ], double[], double[, ]> Execute_Extraction( AudioRecording recording, double eventStart, double eventEnd, int minHz, int maxHz, double frameOverlap, double backgroundThreshold, TimeSpan segmentStartOffset) { //ii: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = recording.BaseName; //sonoConfig.WindowSize = windowSize; sonoConfig.WindowOverlap = frameOverlap; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); Log.WriteLine("Frames: Size={0}, Count={1}, Duration={2:f1}ms, Overlap={5:f2}%, Offset={3:f1}ms, Frames/s={4:f1}", sonogram.Configuration.WindowSize, sonogram.FrameCount, (sonogram.FrameDuration * 1000), (sonogram.FrameStep * 1000), sonogram.FramesPerSecond, frameOverlap); int binCount = (int)(maxHz / sonogram.FBinWidth) - (int)(minHz / sonogram.FBinWidth) + 1; Log.WriteIfVerbose("Freq band: {0} Hz - {1} Hz. (Freq bin count = {2})", minHz, maxHz, binCount); //calculate the modal noise profile double SD_COUNT = 0.0; // number of noise standard deviations used to calculate noise threshold NoiseProfile profile = NoiseProfile.CalculateModalNoiseProfile(sonogram.Data, SD_COUNT); //calculate modal noise profile double[] modalNoise = DataTools.filterMovingAverage(profile.NoiseMode, 7); //smooth the noise profile //extract modal noise values of the required event double[] noiseSubband = SpectrogramTools.ExtractModalNoiseSubband(modalNoise, minHz, maxHz, false, sonogram.NyquistFrequency, sonogram.FBinWidth); //extract data values of the required event double[,] target = SpectrogramTools.ExtractEvent(sonogram.Data, eventStart, eventEnd, sonogram.FrameStep, minHz, maxHz, false, sonogram.NyquistFrequency, sonogram.FBinWidth); // create acoustic event with defined boundaries AcousticEvent ae = new AcousticEvent(segmentStartOffset, eventStart, eventEnd - eventStart, minHz, maxHz); ae.SetTimeAndFreqScales(sonogram.FramesPerSecond, sonogram.FBinWidth); //truncate noise sonogram.Data = SNR.TruncateBgNoiseFromSpectrogram(sonogram.Data, modalNoise); sonogram.Data = SNR.RemoveNeighbourhoodBackgroundNoise(sonogram.Data, backgroundThreshold); double[,] targetMinusNoise = SpectrogramTools.ExtractEvent(sonogram.Data, eventStart, eventEnd, sonogram.FrameStep, minHz, maxHz, false, sonogram.NyquistFrequency, sonogram.FBinWidth); return(Tuple.Create(sonogram, ae, target, noiseSubband, targetMinusNoise)); }
/// <summary> /// Calculates three SUMMARY INDICES - three different measures of spectral entropy. /// Each of them is derived from the frames of the passed amplitude spectrogram. /// 1. the entropy of the average spectrum. /// 2. the entropy of the variance spectrum. /// 3. the entropy of the Coeff of Variation spectrum. /// </summary> /// <param name="amplitudeSpectrogram">matrix.</param> /// <param name="lowerBinBound">lower bin bound to be included in calculation of summary index.</param> /// <param name="reducedFreqBinCount">total bin count to be included in calculation of summary index.</param> /// <returns>two doubles.</returns> public static Tuple <double, double, double> CalculateSpectralEntropies(double[,] amplitudeSpectrogram, int lowerBinBound, int reducedFreqBinCount) { // iv: ENTROPY OF AVERAGE SPECTRUM - at this point the spectrogram is a noise reduced amplitude spectrogram // Entropy is a measure of ENERGY dispersal, therefore must square the amplitude. var tuple = SpectrogramTools.CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram(amplitudeSpectrogram); double[] averageSpectrum = DataTools.Subarray(tuple.Item1, lowerBinBound, reducedFreqBinCount); // remove low band double entropyOfAvSpectrum = DataTools.EntropyNormalised(averageSpectrum); // ENTROPY of spectral averages if (double.IsNaN(entropyOfAvSpectrum)) { entropyOfAvSpectrum = 1.0; } // v: ENTROPY OF VARIANCE SPECTRUM - at this point the spectrogram is a noise reduced amplitude spectrogram double[] varianceSpectrum = DataTools.Subarray(tuple.Item2, lowerBinBound, reducedFreqBinCount); // remove low band double entropyOfVarianceSpectrum = DataTools.EntropyNormalised(varianceSpectrum); // ENTROPY of spectral variances if (double.IsNaN(entropyOfVarianceSpectrum)) { entropyOfVarianceSpectrum = 1.0; } // vi: ENTROPY OF COEFFICIENT OF VARIANCE SPECTRUM int covLength = varianceSpectrum.Length; double[] coeffOfVarSpectrum = new double[covLength]; // remove low band for (int i = 0; i < covLength; i++) { if (averageSpectrum[i] > 0.0) { coeffOfVarSpectrum[i] = varianceSpectrum[i] / averageSpectrum[i]; } else { coeffOfVarSpectrum[i] = 1.0; } } double entropyOfCoeffOfVarSpectrum = DataTools.EntropyNormalised(coeffOfVarSpectrum); // ENTROPY of Coeff Of Variance spectrum if (double.IsNaN(entropyOfVarianceSpectrum)) { entropyOfCoeffOfVarSpectrum = 1.0; } // DataTools.writeBarGraph(indices.varianceSpectrum); // Log.WriteLine("H(Spectral Variance) =" + HSpectralVar); return(Tuple.Create(entropyOfAvSpectrum, entropyOfVarianceSpectrum, entropyOfCoeffOfVarSpectrum)); } // CalculateSpectralEntropies()
} // CalculateSpectralEntropies() /// <summary> /// CALCULATES THE ENTROPY OF DISTRIBUTION of maximum SPECTRAL PEAKS. /// Only spectral peaks between the lowerBinBound and the upperBinBound will be included in calculation. /// </summary> public static double CalculateEntropyOfSpectralPeaks(double[,] amplitudeSpectrogram, int lowerBinBound, int upperBinBound) { // First extract High band SPECTROGRAM which is now noise reduced var midBandSpectrogram = MatrixTools.Submatrix(amplitudeSpectrogram, 0, lowerBinBound, amplitudeSpectrogram.GetLength(0) - 1, upperBinBound - 1); var tupleAmplitudePeaks = SpectrogramTools.HistogramOfSpectralPeaks(midBandSpectrogram); double entropyOfPeakFreqDistr = DataTools.EntropyNormalised(tupleAmplitudePeaks.Item1); if (double.IsNaN(entropyOfPeakFreqDistr)) { entropyOfPeakFreqDistr = 1.0; } return(entropyOfPeakFreqDistr); } // CalculateEntropyOfSpectralPeaks()
public void TestAverageOfDecibelValues() { var decibelArray1 = new[] { 96.0, 100.0, 90.0, 97.0 }; var decibelArray2 = new[] { -96.0, -100.0, -90.0, -97.0 }; // run this once to generate expected test data // uncomment this to update the binary data. Should be rarely needed var average = SpectrogramTools.AverageAnArrayOfDecibelValues(decibelArray1); Assert.AreEqual(96.98816759, average, AllowedDelta); average = SpectrogramTools.AverageAnArrayOfDecibelValues(decibelArray2); Assert.AreEqual(-94.11528038, average, AllowedDelta); }
public static Image FrameSliceOf3DSpectrogram_ConstantFreq(Image bmp1, Image titleBar, TimeSpan xInterval, int herzValue, FileInfo sunriseSetData, int nyquistFreq) { SunAndMoon.AddSunRiseSetLinesToImage((Bitmap)bmp1, sunriseSetData, 0, 365, 1); // assume full year and 1px/day var g = Graphics.FromImage(bmp1); var pen = new Pen(Color.White); var stringFont = new Font("Arial", 12); var str = $"Freq = {herzValue} Hz"; g.DrawString(str, stringFont, Brushes.Wheat, new PointF(10, 7)); var xAxisPixelDuration = TimeSpan.FromSeconds(60); var startOffset = TimeSpan.Zero; double secondsDuration = xAxisPixelDuration.TotalSeconds * bmp1.Width; var fullDuration = TimeSpan.FromSeconds(secondsDuration); // init frequency scale int herzInterval = 1000; int frameSize = bmp1.Height; var freqScale = new DSP.FrequencyScale(nyquistFreq, frameSize, herzInterval); SpectrogramTools.DrawGridLinesOnImage((Bitmap)bmp1, startOffset, fullDuration, xInterval, freqScale); int trackHeight = 20; var xAxisTicInterval = TimeSpan.FromMinutes(60); // assume 60 pixels per hour var timeScale24Hour = ImageTrack.DrawTimeTrack(fullDuration, startOffset, xAxisTicInterval, bmp1.Width, trackHeight, "hours"); var imageList = new List <Image> { titleBar, timeScale24Hour, bmp1, timeScale24Hour }; var compositeBmp = ImageTools.CombineImagesVertically(imageList.ToArray()); if (compositeBmp == null) { throw new ArgumentNullException(nameof(compositeBmp)); } trackHeight = compositeBmp.Height; Bitmap timeScale12Months = ImageTrack.DrawYearScaleVertical(40, trackHeight); Bitmap freqScaleImage = DrawFreqScale_vertical(40, trackHeight, herzValue, nyquistFreq); imageList = new List <Image> { timeScale12Months, compositeBmp, freqScaleImage }; compositeBmp = ImageTools.CombineImagesInLine(imageList.ToArray()); return(compositeBmp); }
/// <summary> /// outputs an array of peak bins indices per frame /// </summary> public static int[] GetPeakBinsIndex(double[,] matrix, int minFreqBin, int maxFreqBin) { // get a submatrix with min and max frequency bins defined in settings. double[,] targetMatrix = GetArbitraryFreqBandMatrix(matrix, minFreqBin, maxFreqBin); // find the peak bins in each spectral of the target matrix int[] peakBins = SpectrogramTools.HistogramOfSpectralPeaks(targetMatrix).Item2; // map the index of peak bins in the target matrix to original input matrix for (int i = 0; i < peakBins.Length; i++) { peakBins[i] = peakBins[i] + minFreqBin - 1; } return(peakBins); }
/// <summary> /// A FALSE-COLOUR VERSION OF DECIBEL SPECTROGRAM /// Taken and adapted from Spectrogram Image 5 in the method of CLASS Audio2InputForConvCNN.cs:. /// </summary> /// <param name="dbSpectrogramData">the sonogram data (NOT noise reduced). </param> public static Image <Rgb24> DrawStandardSpectrogramInFalseColour(double[,] dbSpectrogramData) { // Do NOISE REDUCTION double noiseReductionParameter = 2.0; var tuple = SNR.NoiseReduce(dbSpectrogramData, NoiseReductionType.Standard, noiseReductionParameter); double[,] nrSpectrogramData = tuple.Item1; // store data matrix double ridgeThreshold = 2.5; double[,] matrix = dbSpectrogramData; byte[,] hits = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold); // ################### RESEARCH QUESTION: // I tried different EXPERIMENTS IN NORMALISATION //double min; double max; //DataTools.MinMax(spectralSelection, out min, out max); //double range = max - min; // readjust min and max to create the effect of contrast stretching. It enhances the spectrogram a bit //double fractionalStretching = 0.2; //min = min + (range * fractionalStretching); //max = max - (range * fractionalStretching); //range = max - min; // ULTIMATELY THE BEST APPROACH APPEARED TO BE FIXED NORMALISATION BOUNDS double truncateMin = -95.0; double truncateMax = -30.0; double filterCoefficient = 0.75; double[,] dbSpectrogramNorm = SpectrogramTools.NormaliseSpectrogramMatrix(dbSpectrogramData, truncateMin, truncateMax, filterCoefficient); truncateMin = 0; truncateMax = 50; // nr = noise reduced double[,] nrSpectrogramNorm = SpectrogramTools.NormaliseSpectrogramMatrix(nrSpectrogramData, truncateMin, truncateMax, filterCoefficient); nrSpectrogramNorm = MatrixTools.BoundMatrix(nrSpectrogramNorm, 0.0, 0.9); nrSpectrogramNorm = MatrixTools.SquareRootOfValues(nrSpectrogramNorm); nrSpectrogramNorm = DataTools.normalise(nrSpectrogramNorm); // create image from normalised data var image = SpectrogramTools.CreateFalseColourDecibelSpectrogramForZooming(dbSpectrogramNorm, nrSpectrogramNorm, hits); return(image); }
/// <summary> /// Can be used for visual checking and debugging purposes. /// </summary> public static void DrawNormalisedIndexMatrices(DirectoryInfo dir, string baseName, Dictionary <string, double[, ]> dictionary) { var list = new List <Image>(); foreach (string key in ContentSignatures.IndexNames) { var bmp = ImageTools.DrawReversedMatrixWithoutNormalisation(dictionary[key]); // need to rotate spectrogram to get correct orientation. bmp.RotateFlip(RotateFlipType.Rotate270FlipNone); // draw grid lines and add axis scales var xAxisPixelDuration = TimeSpan.FromSeconds(60); var fullDuration = TimeSpan.FromTicks(xAxisPixelDuration.Ticks * bmp.Width); var freqScale = new FrequencyScale(11025, 512, 1000); SpectrogramTools.DrawGridLinesOnImage((Bitmap)bmp, TimeSpan.Zero, fullDuration, xAxisPixelDuration, freqScale); const int trackHeight = 20; var recordingStartDate = default(DateTimeOffset); var timeBmp = ImageTrack.DrawTimeTrack(fullDuration, recordingStartDate, bmp.Width, trackHeight); var array = new Image[2]; array[0] = bmp; array[1] = timeBmp; var image = ImageTools.CombineImagesVertically(array); // add a header to the spectrogram var header = new Bitmap(image.Width, 20); Graphics g = Graphics.FromImage(header); g.Clear(Color.LightGray); g.SmoothingMode = SmoothingMode.AntiAlias; g.InterpolationMode = InterpolationMode.HighQualityBicubic; g.PixelOffsetMode = PixelOffsetMode.HighQuality; g.DrawString(key, new Font("Tahoma", 9), Brushes.Black, 4, 4); list.Add(ImageTools.CombineImagesVertically(new List <Image>(new[] { header, image }))); } // save the image - the directory for the path must exist var path = Path.Combine(dir.FullName, baseName + "__Towsey.Acoustic.GreyScaleImages.png"); var indexImage = ImageTools.CombineImagesInLine(list); indexImage?.Save(path); }
public static void AssertFrequencyInSignal(WavReader wavReader, double[] signal, int[] frequencies, int variance = 1) { var fft = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(signal, wavReader.SampleRate, wavReader.Epsilon, 512, 0.0); var histogram = SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram(fft.AmplitudeSpectrogram); var max = histogram.Max(); double threshold = max * 0.8; var highBins = frequencies.Select(f => (int)(f / fft.FreqBinWidth)).ToArray(); bool isOk = true; for (int bin = 0; bin < histogram.Length; bin++) { var value = histogram[bin]; if (value > threshold) { bool anyMatch = false; foreach (var highBin in highBins) { if (bin >= highBin - variance && bin <= highBin + variance) { anyMatch = true; break; } } isOk = anyMatch; } if (!isOk) { break; } } BaseTest.Assert.IsTrue(isOk); }
/// <summary> /// Can be used for visual checking and debugging purposes. /// </summary> public static void DrawNormalisedIndexMatrices(DirectoryInfo dir, string baseName, Dictionary <string, double[, ]> dictionary) { var list = new List <Image <Rgb24> >(); foreach (string key in ContentSignatures.IndexNames) { var bmp = ImageTools.DrawReversedMatrixWithoutNormalisation(dictionary[key]); // need to rotate spectrogram to get correct orientation. bmp.RotateFlip(RotateFlipType.Rotate270FlipNone); // draw grid lines and add axis scales var xAxisPixelDuration = TimeSpan.FromSeconds(60); var fullDuration = TimeSpan.FromTicks(xAxisPixelDuration.Ticks * bmp.Width); var freqScale = new FrequencyScale(11025, 512, 1000); SpectrogramTools.DrawGridLinesOnImage((Image <Rgb24>)bmp, TimeSpan.Zero, fullDuration, xAxisPixelDuration, freqScale); const int trackHeight = 20; var recordingStartDate = default(DateTimeOffset); var timeBmp = ImageTrack.DrawTimeTrack(fullDuration, recordingStartDate, bmp.Width, trackHeight); var image = ImageTools.CombineImagesVertically(bmp, timeBmp); // add a header to the spectrogram var header = Drawing.NewImage(image.Width, 20, Color.LightGray); header.Mutate(g => { g.DrawText(key, Drawing.Tahoma9, Color.Black, new PointF(4, 4)); list.Add(ImageTools.CombineImagesVertically(header, image)); }); } // save the image - the directory for the path must exist var path = Path.Combine(dir.FullName, baseName + "__Towsey.Acoustic.GreyScaleImages.png"); var indexImage = ImageTools.CombineImagesInLine(list); indexImage?.Save(path); }
//////public static IndexCalculateResult Analysis( public static SpectralIndexValuesForContentDescription Analysis( AudioRecording recording, TimeSpan segmentOffsetTimeSpan, int sampleRateOfOriginalAudioFile, bool returnSonogramInfo = false) { // returnSonogramInfo = true; // if debugging double epsilon = recording.Epsilon; int sampleRate = recording.WavReader.SampleRate; //var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); var indexCalculationDuration = TimeSpan.FromSeconds(ContentSignatures.IndexCalculationDurationInSeconds); // Get FRAME parameters for the calculation of Acoustic Indices int frameSize = ContentSignatures.FrameSize; int frameStep = frameSize; // that is, windowOverlap = zero double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second var frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); // INITIALISE a RESULTS STRUCTURE TO return // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc. var config = new IndexCalculateConfig(); // sets some default values int freqBinCount = frameSize / 2; var indexProperties = GetIndexProperties(); ////////var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, segmentOffsetTimeSpan, config); var spectralIndices = new SpectralIndexValuesForContentDescription(); ///////result.SummaryIndexValues = null; ///////SpectralIndexValues spectralIndices = result.SpectralIndexValues; // set up default spectrogram to return ///////result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null; ///////result.Hits = null; ///////result.TrackScores = new List<Plot>(); // ################################## FINISHED SET-UP // ################################## NOW GET THE AMPLITUDE SPECTROGRAM // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT // Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns. var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep); var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram; // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ################################## // Get the oscillation spectral index OSC separately from signal because need a different frame size etc. var sampleLength = Oscillations2014.DefaultSampleLength; var frameLength = Oscillations2014.DefaultFrameLength; var sensitivity = Oscillations2014.DefaultSensitivityThreshold; var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(recording, frameLength, sampleLength, sensitivity); // double length of the vector because want to work with 256 element vector for spectrogram purposes spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort); // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ################################## // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2. // original sample rate can be anything 11.0-44.1 kHz. int originalNyquist = sampleRateOfOriginalAudioFile / 2; // if up-sampling has been done if (dspOutput1.NyquistFreq > originalNyquist) { dspOutput1.NyquistFreq = originalNyquist; dspOutput1.NyquistBin = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that bin width does not change } // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX spectralIndices.ACI = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram); // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram); for (int i = 0; i < temporalEntropySpectrum.Length; i++) { temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i]; } spectralIndices.ENT = temporalEntropySpectrum; // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ################################## // i: Convert amplitude spectrogram to decibels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); spectralIndices.BGN = spectralDecibelBgn; // ii: Calculate the noise reduced decibel spectrogram derived from segment recording. // REUSE the var decibelSpectrogram but this time using dspOutput1. decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(decibelSpectrogram); // ###################################################################################################################################################### // iv: CALCULATE SPECTRAL COVER. NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0 // FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth // dB THRESHOLD for calculating spectral coverage double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb; // Calculate lower and upper boundary bin ids. // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from bio-phony. int midFreqBound = config.MidFreqBound; int lowFreqBound = config.LowFreqBound; int lowerBinBound = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth); int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth); var spActivity = ActivityAndCover.CalculateSpectralEvents(decibelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound); //spectralIndices.CVR = spActivity.CoverSpectrum; spectralIndices.EVN = spActivity.EventSpectrum; ///////result.TrackScores = null; ///////return result; return(spectralIndices); } // end calculation of Six Spectral Indices
} //Analysis() /// <summary> /// returns some indices relevant to rain and cicadas from a short (10seconds) chunk of audio /// </summary> /// <param name="signal">signal envelope of a 10s chunk of audio</param> /// <param name="spectrogram">spectrogram of a 10s chunk of audio</param> /// <param name="lowFreqBound"></param> /// <param name="midFreqBound"></param> /// <param name="binWidth"></param> /// <returns></returns> public static RainStruct Get10SecondIndices(double[] signal, double[,] spectrogram, int lowFreqBound, int midFreqBound, TimeSpan frameDuration, double binWidth) { // i: FRAME ENERGIES - double StandardDeviationCount = 0.1; var results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(signal), StandardDeviationCount); //use Lamel et al. var dBarray = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal); bool[] activeFrames = new bool[dBarray.Length]; //record frames with activity >= threshold dB above background and count for (int i = 0; i < dBarray.Length; i++) { if (dBarray[i] >= ActivityAndCover.DefaultActivityThresholdDb) { activeFrames[i] = true; } } //int activeFrameCount = dBarray.Count((x) => (x >= AcousticIndices.DEFAULT_activityThreshold_dB)); int activeFrameCount = DataTools.CountTrues(activeFrames); double spikeThreshold = 0.05; double spikeIndex = CalculateSpikeIndex(signal, spikeThreshold); //Console.WriteLine("spikeIndex=" + spikeIndex); //DataTools.writeBarGraph(signal); RainStruct rainIndices; // struct in which to store all indices rainIndices.activity = activeFrameCount / (double)dBarray.Length; //fraction of frames having acoustic activity rainIndices.bgNoise = results3.NoiseMode; //bg noise in dB rainIndices.snr = results3.Snr; //snr rainIndices.avSig_dB = 20 * Math.Log10(signal.Average()); //10 times log of amplitude squared rainIndices.temporalEntropy = DataTools.EntropyNormalised(DataTools.SquareValues(signal)); //ENTROPY of ENERGY ENVELOPE rainIndices.spikes = spikeIndex; // ii: calculate the bin id of boundary between mid and low frequency spectrum int lowBinBound = (int)Math.Ceiling(lowFreqBound / binWidth); var midbandSpectrogram = MatrixTools.Submatrix(spectrogram, 0, lowBinBound, spectrogram.GetLength(0) - 1, spectrogram.GetLength(1) - 1); // iii: ENTROPY OF AVERAGE SPECTRUM and VARIANCE SPECTRUM - at this point the spectrogram is still an amplitude spectrogram var tuple = SpectrogramTools.CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram(midbandSpectrogram); rainIndices.spectralEntropy = DataTools.EntropyNormalised(tuple.Item1); //ENTROPY of spectral averages if (double.IsNaN(rainIndices.spectralEntropy)) { rainIndices.spectralEntropy = 1.0; } // iv: CALCULATE Acoustic Complexity Index on the AMPLITUDE SPECTRUM var aciArray = AcousticComplexityIndex.CalculateAci(midbandSpectrogram); rainIndices.ACI = aciArray.Average(); //v: remove background noise from the spectrogram double spectralBgThreshold = 0.015; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background //double[] modalValues = SNR.CalculateModalValues(spectrogram); //calculate modal value for each freq bin. //modalValues = DataTools.filterMovingAverage(modalValues, 7); //smooth the modal profile //spectrogram = SNR.SubtractBgNoiseFromSpectrogramAndTruncate(spectrogram, modalValues); //spectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogram, spectralBgThreshold); //vi: SPECTROGRAM ANALYSIS - SPECTRAL COVER. NOTE: spectrogram is still a noise reduced amplitude spectrogram SpectralActivity sa = ActivityAndCover.CalculateSpectralEvents(spectrogram, spectralBgThreshold, frameDuration, lowFreqBound, midFreqBound, binWidth); rainIndices.lowFreqCover = sa.LowFreqBandCover; rainIndices.midFreqCover = sa.MidFreqBandCover; rainIndices.hiFreqCover = sa.HighFreqBandCover; //double[] coverSpectrum = sa.coverSpectrum; //double[] eventSpectrum = sa.eventSpectrum; return(rainIndices); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; const int frameSize = 256; const double windowOverlap = 0.0; double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // duration of DCT in seconds //double dctDuration = (double)configuration[AnalysisKeys.DctDuration]; // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double decibelThreshold = configuration.GetDouble(AnalysisKeys.DecibelThreshold); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; var recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(maxHz / freqBinWidth) + 1; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double framesPerSecond = freqBinWidth; double minPeriod = 1 / (double)maxOscilFreq; double maxPeriod = 1 / (double)minOscilFreq; double dctDuration = 5 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // remove baseline from amplitude array var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7); // remove hi freq content from amplitude array var lowPassFilteredSignal = DataTools.filterMovingAverageOdd(amplitudeArray, 11); var dctScores = new double[highPassFilteredSignal.Length]; const int step = 2; for (int i = dctLength; i < highPassFilteredSignal.Length - dctLength; i += step) { if (highPassFilteredSignal[i] < decibelThreshold) { continue; } double[] subArray = DataTools.Subarray(highPassFilteredSignal, i, dctLength); // Look for oscillations in the highPassFilteredSignal Oscillations2014.GetOscillationUsingDct(subArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; if (!periodWithinBounds) { continue; } if (intensity < dctThreshold) { continue; } //lay down score for sample length for (int j = 0; j < dctLength; j++) { if (dctScores[i + j] < intensity && lowPassFilteredSignal[i + j] > decibelThreshold) { dctScores[i + j] = intensity; } } } //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var acousticEvents = AcousticEvent.ConvertScoreArray2Events( dctScores, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); // ###################################################################### acousticEvents.ForEach(ae => { ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = abbreviatedSpeciesName; }); var plot = new Plot(this.DisplayName, dctScores, eventThreshold); var plots = new List <Plot> { plot }; // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(highPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold); DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { ampltdPlot, lowPassPlot, demeanedPlot, plot }; Image debugImage = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, debugPlots, null); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } return(new RecognizerResults() { Sonogram = sonogram, Hits = null, Plots = plots, Events = acousticEvents, }); }
public static AudioToSonogramResult GenerateSpectrogramImages(FileInfo sourceRecording, Dictionary <string, string> configDict, DirectoryInfo outputDirectory) { // the source name was set up in the Analyse() method. But it could also be obtained directly from recording. string sourceName = configDict[ConfigKeys.Recording.Key_RecordingFileName]; sourceName = Path.GetFileNameWithoutExtension(sourceName); var result = new AudioToSonogramResult(); // init the image stack var list = new List <Image>(); // 1) draw amplitude spectrogram var recordingSegment = new AudioRecording(sourceRecording.FullName); // default values config except disable noise removal for first two spectrograms SonogramConfig sonoConfig = new SonogramConfig(configDict) { NoiseReductionType = NoiseReductionType.None }; BaseSonogram sonogram = new AmplitudeSonogram(sonoConfig, recordingSegment.WavReader); // remove the DC bin sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.FrameCount - 1, sonogram.Configuration.FreqBinCount); // save spectrogram data at this point - prior to noise reduction double[,] spectrogramDataBeforeNoiseReduction = sonogram.Data; const int lowPercentile = 20; const double neighbourhoodSeconds = 0.25; int neighbourhoodFrames = (int)(sonogram.FramesPerSecond * neighbourhoodSeconds); const double lcnContrastLevel = 0.25; ////LoggedConsole.WriteLine("LCN: FramesPerSecond (Prior to LCN) = {0}", sonogram.FramesPerSecond); ////LoggedConsole.WriteLine("LCN: Neighbourhood of {0} seconds = {1} frames", neighbourhoodSeconds, neighbourhoodFrames); sonogram.Data = NoiseRemoval_Briggs.NoiseReduction_ShortRecordings_SubtractAndLCN(sonogram.Data, lowPercentile, neighbourhoodFrames, lcnContrastLevel); // draw amplitude spectrogram unannotated FileInfo outputImage1 = new FileInfo(Path.Combine(outputDirectory.FullName, sourceName + ".amplitd.bmp")); ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(sonogram.Data), outputImage1.FullName); // draw amplitude spectrogram annotated var image = sonogram.GetImageFullyAnnotated("AMPLITUDE SPECTROGRAM + Bin LCN (Local Contrast Normalisation)"); list.Add(image); ////string path2 = @"C:\SensorNetworks\Output\Sonograms\dataInput2.png"; ////Histogram.DrawDistributionsAndSaveImage(sonogram.Data, path2); // 2) A FALSE-COLOUR VERSION OF AMPLITUDE SPECTROGRAM double ridgeThreshold = 0.20; double[,] matrix = ImageTools.WienerFilter(sonogram.Data, 3); byte[,] hits = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold); hits = RidgeDetection.JoinDisconnectedRidgesInMatrix(hits, matrix, ridgeThreshold); image = SpectrogramTools.CreateFalseColourAmplitudeSpectrogram(spectrogramDataBeforeNoiseReduction, null, hits); image = sonogram.GetImageAnnotatedWithLinearHerzScale(image, "AMPLITUDE SPECTROGRAM + LCN + ridge detection"); list.Add(image); Image envelopeImage = ImageTrack.DrawWaveEnvelopeTrack(recordingSegment, image.Width); list.Add(envelopeImage); // 3) now draw the standard decibel spectrogram sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader); // remove the DC bin sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.FrameCount - 1, sonogram.Configuration.FreqBinCount); // draw decibel spectrogram unannotated FileInfo outputImage2 = new FileInfo(Path.Combine(outputDirectory.FullName, sourceName + ".deciBel.bmp")); ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(sonogram.Data), outputImage2.FullName); image = sonogram.GetImageFullyAnnotated("DECIBEL SPECTROGRAM"); list.Add(image); Image segmentationImage = ImageTrack.DrawSegmentationTrack( sonogram, EndpointDetectionConfiguration.K1Threshold, EndpointDetectionConfiguration.K2Threshold, image.Width); list.Add(segmentationImage); // keep the sonogram data (NOT noise reduced) for later use double[,] dbSpectrogramData = (double[, ])sonogram.Data.Clone(); // 4) now draw the noise reduced decibel spectrogram sonoConfig.NoiseReductionType = NoiseReductionType.Standard; sonoConfig.NoiseReductionParameter = 3; ////sonoConfig.NoiseReductionType = NoiseReductionType.SHORT_RECORDING; ////sonoConfig.NoiseReductionParameter = 50; sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader); // draw decibel spectrogram unannotated FileInfo outputImage3 = new FileInfo(Path.Combine(outputDirectory.FullName, sourceName + ".noNoise_dB.bmp")); ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(sonogram.Data), outputImage3.FullName); image = sonogram.GetImageFullyAnnotated("DECIBEL SPECTROGRAM + Lamel noise subtraction"); list.Add(image); // keep the sonogram data for later use double[,] nrSpectrogramData = sonogram.Data; // 5) A FALSE-COLOUR VERSION OF DECIBEL SPECTROGRAM ridgeThreshold = 2.5; matrix = ImageTools.WienerFilter(dbSpectrogramData, 3); hits = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold); image = SpectrogramTools.CreateFalseColourDecibelSpectrogram(dbSpectrogramData, nrSpectrogramData, hits); image = sonogram.GetImageAnnotatedWithLinearHerzScale(image, "DECIBEL SPECTROGRAM - Colour annotated"); list.Add(image); // 6) COMBINE THE SPECTROGRAM IMAGES Image compositeImage = ImageTools.CombineImagesVertically(list); FileInfo outputImage = new FileInfo(Path.Combine(outputDirectory.FullName, sourceName + ".5spectro.png")); compositeImage.Save(outputImage.FullName, ImageFormat.Png); result.SpectrogramFile = outputImage; // 7) Generate the FREQUENCY x OSCILLATIONS Graphs and csv data ////bool saveData = true; ////bool saveImage = true; ////double[] oscillationsSpectrum = Oscillations2014.GenerateOscillationDataAndImages(sourceRecording, configDict, saveData, saveImage); return(result); }
public static AudioToSonogramResult GenerateFourSpectrogramImages( FileInfo sourceRecording, FileInfo path2SoxSpectrogram, Dictionary <string, string> configDict, bool dataOnly = false, bool makeSoxSonogram = false) { var result = new AudioToSonogramResult(); if (dataOnly && makeSoxSonogram) { throw new ArgumentException("Can't produce data only for a SoX sonogram"); } if (makeSoxSonogram) { SpectrogramTools.MakeSonogramWithSox(sourceRecording, configDict, path2SoxSpectrogram); result.Path2SoxImage = path2SoxSpectrogram; } else if (dataOnly) { var recordingSegment = new AudioRecording(sourceRecording.FullName); var sonoConfig = new SonogramConfig(configDict); // default values config // disable noise removal sonoConfig.NoiseReductionType = NoiseReductionType.None; Log.Warn("Noise removal disabled!"); var sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader); result.DecibelSpectrogram = sonogram; } else { // init the image stack var list = new List <Image>(); // IMAGE 1) draw amplitude spectrogram var recordingSegment = new AudioRecording(sourceRecording.FullName); var sonoConfig = new SonogramConfig(configDict); // default values config // disable noise removal for first two spectrograms var disabledNoiseReductionType = sonoConfig.NoiseReductionType; sonoConfig.NoiseReductionType = NoiseReductionType.None; BaseSonogram sonogram = new AmplitudeSonogram(sonoConfig, recordingSegment.WavReader); // remove the DC bin if it has not already been removed. // Assume test of divisible by 2 is good enough. int binCount = sonogram.Data.GetLength(1); if (!binCount.IsEven()) { sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.FrameCount - 1, binCount - 1); } //save spectrogram data at this point - prior to noise reduction var spectrogramDataBeforeNoiseReduction = sonogram.Data; const double neighbourhoodSeconds = 0.25; int neighbourhoodFrames = (int)(sonogram.FramesPerSecond * neighbourhoodSeconds); const double lcnContrastLevel = 0.001; LoggedConsole.WriteLine("LCN: FramesPerSecond (Prior to LCN) = {0}", sonogram.FramesPerSecond); LoggedConsole.WriteLine("LCN: Neighbourhood of {0} seconds = {1} frames", neighbourhoodSeconds, neighbourhoodFrames); const int lowPercentile = 20; sonogram.Data = NoiseRemoval_Briggs.NoiseReduction_byLowestPercentileSubtraction(sonogram.Data, lowPercentile); sonogram.Data = NoiseRemoval_Briggs.NoiseReduction_byLCNDivision(sonogram.Data, neighbourhoodFrames, lcnContrastLevel); //sonogram.Data = NoiseRemoval_Briggs.NoiseReduction_byLowestPercentileSubtraction(sonogram.Data, lowPercentile); var image = sonogram.GetImageFullyAnnotated("AMPLITUDE SPECTROGRAM + Bin LCN (Local Contrast Normalisation)"); list.Add(image); //string path2 = @"C:\SensorNetworks\Output\Sonograms\dataInput2.png"; //Histogram.DrawDistributionsAndSaveImage(sonogram.Data, path2); // double[,] matrix = sonogram.Data; double[,] matrix = ImageTools.WienerFilter(sonogram.Data, 3); double ridgeThreshold = 0.25; byte[,] hits = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold); hits = RidgeDetection.JoinDisconnectedRidgesInMatrix(hits, matrix, ridgeThreshold); image = SpectrogramTools.CreateFalseColourAmplitudeSpectrogram(spectrogramDataBeforeNoiseReduction, null, hits); image = sonogram.GetImageAnnotatedWithLinearHerzScale(image, "AMPLITUDE SPECTROGRAM + LCN + ridge detection"); list.Add(image); Image envelopeImage = ImageTrack.DrawWaveEnvelopeTrack(recordingSegment, image.Width); list.Add(envelopeImage); // IMAGE 2) now draw the standard decibel spectrogram sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader); result.DecibelSpectrogram = (SpectrogramStandard)sonogram; image = sonogram.GetImageFullyAnnotated("DECIBEL SPECTROGRAM"); list.Add(image); Image segmentationImage = ImageTrack.DrawSegmentationTrack( sonogram, EndpointDetectionConfiguration.K1Threshold, EndpointDetectionConfiguration.K2Threshold, image.Width); list.Add(segmentationImage); // keep the sonogram data for later use double[,] dbSpectrogramData = (double[, ])sonogram.Data.Clone(); // 3) now draw the noise reduced decibel spectrogram // #NOISE REDUCTION PARAMETERS - restore noise reduction ################################################################## sonoConfig.NoiseReductionType = disabledNoiseReductionType; sonoConfig.NoiseReductionParameter = double.Parse(configDict[AnalysisKeys.NoiseBgThreshold] ?? "2.0"); // #NOISE REDUCTION PARAMETERS - MARINE HACK ################################################################## //sonoConfig.NoiseReductionType = NoiseReductionType.FIXED_DYNAMIC_RANGE; //sonoConfig.NoiseReductionParameter = 80.0; sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader); image = sonogram.GetImageFullyAnnotated("DECIBEL SPECTROGRAM + Lamel noise subtraction"); list.Add(image); // keep the sonogram data for later use double[,] nrSpectrogramData = sonogram.Data; // 4) A FALSE-COLOUR VERSION OF SPECTROGRAM // ########################### SOBEL ridge detection ridgeThreshold = 3.5; matrix = ImageTools.WienerFilter(dbSpectrogramData, 3); hits = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold); // ########################### EIGEN ridge detection //double ridgeThreshold = 6.0; //double dominanceThreshold = 0.7; //var rotatedData = MatrixTools.MatrixRotate90Anticlockwise(dbSpectrogramData); //byte[,] hits = RidgeDetection.StructureTensorRidgeDetection(rotatedData, ridgeThreshold, dominanceThreshold); //hits = MatrixTools.MatrixRotate90Clockwise(hits); // ########################### EIGEN ridge detection image = SpectrogramTools.CreateFalseColourDecibelSpectrogram(dbSpectrogramData, nrSpectrogramData, hits); image = sonogram.GetImageAnnotatedWithLinearHerzScale(image, "DECIBEL SPECTROGRAM - Colour annotated"); list.Add(image); // 5) TODO: ONE OF THESE YEARS FIX UP THE CEPTRAL SONOGRAM ////SpectrogramCepstral cepgram = new SpectrogramCepstral((AmplitudeSonogram)amplitudeSpg); ////var mti3 = SpectrogramTools.Sonogram2MultiTrackImage(sonogram, configDict); ////var image3 = mti3.GetImage(); ////image3.Save(fiImage.FullName + "3", ImageFormat.Png); // 6) COMBINE THE SPECTROGRAM IMAGES result.CompositeImage = ImageTools.CombineImagesVertically(list); } return(result); }
//Analyze() /// <summary> /// ################ THE KEY ANALYSIS METHOD /// Returns a DataTable /// </summary> /// <param name="fiSegmentOfSourceFile"></param> /// <param name="analysisSettings"></param> /// <param name="originalSampleRate"></param> /// <param name="segmentStartOffset"></param> /// <param name="configDict"></param> /// <param name="diOutputDir"></param> public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, AnalysisSettings analysisSettings, int originalSampleRate, TimeSpan segmentStartOffset) { Dictionary <string, string> configDict = analysisSettings.ConfigDict; int originalAudioNyquist = originalSampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz. //set default values - ignore those set by user int frameSize = 32; double windowOverlap = 0.3; int xCorrelationLength = 256; //for Xcorrelation - 256 frames @801 = 320ms, almost 1/3 second. //int xCorrelationLength = 128; //for Xcorrelation - 128 frames @801 = 160ms, almost 1/6 second. //int xCorrelationLength = 64; //for Xcorrelation - 64 frames @128 = 232ms, almost 1/4 second. //int xCorrelationLength = 16; //for Xcorrelation - 16 frames @128 = 232ms, almost 1/4 second. double dBThreshold = 12.0; // read frog data to datatable var dt = CsvTools.ReadCSVToTable(configDict[key_FROG_DATA], true); // read file contining parameters of frog calls to a table double intensityThreshold = double.Parse(configDict[AnalysisKeys.IntensityThreshold]); //in 0-1 double minDuration = double.Parse(configDict[AnalysisKeys.MinDuration]); // seconds double maxDuration = double.Parse(configDict[AnalysisKeys.MaxDuration]); // seconds double minPeriod = double.Parse(configDict[AnalysisKeys.MinPeriodicity]); // seconds double maxPeriod = double.Parse(configDict[AnalysisKeys.MaxPeriodicity]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = recording.BaseName; sonoConfig.WindowSize = frameSize; sonoConfig.WindowOverlap = windowOverlap; //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"); //must do noise removal TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double frameOffset = sonoConfig.GetFrameOffset(sr); double framesPerSecond = 1 / frameOffset; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); //iii: GET TRACKS int nhLimit = 3; //limit of neighbourhood around maximum var peaks = DataTools.GetPeakValues(sonogram.DecibelsPerFrame); var tuple = SpectralTrack.GetSpectralMaxima(sonogram.DecibelsPerFrame, sonogram.Data, dBThreshold, nhLimit); var maxFreqArray = tuple.Item1; //array (one element per frame) indicating which freq bin has max amplitude. var hitsMatrix = tuple.Item2; int herzOffset = 0; int maxFreq = 6000; var tracks = SpectralTrack.GetSpectralTracks(maxFreqArray, framesPerSecond, freqBinWidth, herzOffset, SpectralTrack.MIN_TRACK_DURATION, SpectralTrack.MAX_INTRASYLLABLE_GAP, maxFreq); double severity = 0.5; double dynamicRange = 60; // deciBels above background noise. BG noise has already been removed from each bin. // convert sonogram to a list of frequency bin arrays var listOfFrequencyBins = SpectrogramTools.Sonogram2ListOfFreqBinArrays(sonogram, dynamicRange); int minFrameLength = SpectralTrack.FrameCountEquivalent(SpectralTrack.MIN_TRACK_DURATION, framesPerSecond); for (int i = tracks.Count - 1; i >= 0; i--) { tracks[i].CropTrack(listOfFrequencyBins, severity); if (tracks[i].Length < minFrameLength) { tracks.Remove(tracks[i]); } } // foreach track foreach (SpectralTrack track in tracks) // find any periodicity in the track and calculate its score. { SpectralTrack.DetectTrackPeriodicity(track, xCorrelationLength, listOfFrequencyBins, sonogram.FramesPerSecond); } // foreach track int rowCount = sonogram.Data.GetLength(0); int MAX_FREQ_BOUND = 6000; int topBin = (int)Math.Round(MAX_FREQ_BOUND / freqBinWidth); var plots = CreateScorePlots(tracks, rowCount, topBin); //iv: CONVERT TRACKS TO ACOUSTIC EVENTS List <AcousticEvent> frogEvents = SpectralTrack.ConvertTracks2Events(tracks, segmentStartOffset); // v: GET FROG IDs //var frogEvents = new List<AcousticEvent>(); foreach (AcousticEvent ae in frogEvents) { double oscRate = 1 / ae.Periodicity; // ae.DominantFreq // ae.Score // ae.Duration //ClassifyFrogEvent(ae); string[] names = ClassifyFrogEvent(ae.DominantFreq, oscRate, dt); ae.Name = names[0]; ae.Name2 = names[1]; } return(Tuple.Create(sonogram, hitsMatrix, plots, frogEvents, tsRecordingtDuration)); } //Analysis()
public static IndexCalculateResult Analysis( AudioRecording recording, TimeSpan subsegmentOffsetTimeSpan, Dictionary <string, IndexProperties> indexProperties, int sampleRateOfOriginalAudioFile, TimeSpan segmentStartOffset, IndexCalculateConfig config, bool returnSonogramInfo = false) { // returnSonogramInfo = true; // if debugging double epsilon = recording.Epsilon; int signalLength = recording.WavReader.GetChannel(0).Length; int sampleRate = recording.WavReader.SampleRate; var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); var indexCalculationDuration = config.IndexCalculationDurationTimeSpan; int nyquist = sampleRate / 2; // Get FRAME parameters for the calculation of Acoustic Indices //WARNING: DO NOT USE Frame Overlap when calculating acoustic indices. // It yields ACI, BGN, POW and EVN results that are significantly different from the default. // I have not had time to check if the difference is meaningful. Best to avoid. //int frameSize = (int?)config[AnalysisKeys.FrameLength] ?? IndexCalculateConfig.DefaultWindowSize; int frameSize = config.FrameLength; int frameStep = frameSize; // that is, windowOverlap = zero double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second var frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); int midFreqBound = config.MidFreqBound; int lowFreqBound = config.LowFreqBound; int freqBinCount = frameSize / 2; // double freqBinWidth = recording.Nyquist / (double)freqBinCount; // get duration in seconds and sample count and frame count double subsegmentDurationInSeconds = indexCalculationDuration.TotalSeconds; int subsegmentSampleCount = (int)(subsegmentDurationInSeconds * sampleRate); double subsegmentFrameCount = subsegmentSampleCount / (double)frameStep; subsegmentFrameCount = (int)Math.Ceiling(subsegmentFrameCount); // In order not to lose the last fractional frame, round up the frame number // and get the exact number of samples in the integer number of frames. // Do this because when IndexCalculationDuration = 100ms, the number of frames is only 8. subsegmentSampleCount = (int)(subsegmentFrameCount * frameStep); // get start and end samples of the subsegment and noise segment double localOffsetInSeconds = subsegmentOffsetTimeSpan.TotalSeconds - segmentStartOffset.TotalSeconds; int startSample = (int)(localOffsetInSeconds * sampleRate); int endSample = startSample + subsegmentSampleCount - 1; // Default behaviour: set SUBSEGMENT = total recording var subsegmentRecording = recording; // But if the indexCalculationDuration < segmentDuration if (indexCalculationDuration < segmentDuration) { // minimum samples needed to calculate acoustic indices. This value was chosen somewhat arbitrarily. // It allowes for case where IndexCalculationDuration = 100ms which is approx 8 frames int minimumViableSampleCount = frameSize * 8; int availableSignal = signalLength - startSample; // if (the required audio is beyond recording OR insufficient for analysis) then backtrack. if (availableSignal < minimumViableSampleCount) { // Back-track so we can fill a whole result. // This is a silent correction, equivalent to having a segment overlap for the last segment. var oldStart = startSample; startSample = signalLength - subsegmentSampleCount; endSample = signalLength; Logger.Trace(" Backtrack subsegment to fill missing data from imperfect audio cuts because not enough samples available. " + (oldStart - startSample) + " samples overlap."); } var subsamples = DataTools.Subarray(recording.WavReader.Samples, startSample, subsegmentSampleCount); var wr = new Acoustics.Tools.Wav.WavReader(subsamples, 1, 16, sampleRate); subsegmentRecording = new AudioRecording(wr); } // INITIALISE a RESULTS STRUCTURE TO return // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc. var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, subsegmentOffsetTimeSpan, config); SummaryIndexValues summaryIndices = result.SummaryIndexValues; SpectralIndexValues spectralIndices = result.SpectralIndexValues; // set up default spectrogram to return result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null; result.Hits = null; result.TrackScores = new List <Plot>(); // ################################## FINSIHED SET-UP // ################################## NOW GET THE AMPLITUDE SPECTORGRAMS // EXTRACT ENVELOPE and SPECTROGRAM FROM SUBSEGMENT var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(subsegmentRecording, frameSize, frameStep); // Select band according to min and max bandwidth int minBand = (int)(dspOutput1.AmplitudeSpectrogram.GetLength(1) * config.MinBandWidth); int maxBand = (int)(dspOutput1.AmplitudeSpectrogram.GetLength(1) * config.MaxBandWidth) - 1; dspOutput1.AmplitudeSpectrogram = MatrixTools.Submatrix( dspOutput1.AmplitudeSpectrogram, 0, minBand, dspOutput1.AmplitudeSpectrogram.GetLength(0) - 1, maxBand); // TODO: Michael to review whether bandwidth filter should be moved to DSP_Frames?? // Recalculate NyquistBin and FreqBinWidth, because they change with band selection //dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; //dspOutput1.FreqBinWidth = sampleRate / (double)dspOutput1.AmplitudeSpectrogram.GetLength(1) / 2; // Linear or Octave or Mel frequency scale? Set Linear as default. var freqScale = new FrequencyScale(nyquist: nyquist, frameSize: frameSize, hertzGridInterval: 1000); var freqScaleType = config.FrequencyScale; bool octaveScale = freqScaleType == FreqScaleType.Linear125Octaves7Tones28Nyquist32000; bool melScale = freqScaleType == FreqScaleType.Mel; if (octaveScale) { // only allow one octave scale at the moment - for Jasco marine recordings. // ASSUME fixed Occtave scale - USEFUL ONLY FOR JASCO 64000sr MARINE RECORDINGS // If you wish to use other octave scale types then need to put in the config file and and set up recovery here. freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); // Recalculate the spectrogram according to octave scale. This option works only when have high SR recordings. dspOutput1.AmplitudeSpectrogram = OctaveFreqScale.AmplitudeSpectra( dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon, freqScale); dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; // ASSUMPTION!!! Nyquist is in top Octave bin - not necessarily true!! } else if (melScale) { int minFreq = 0; int maxFreq = recording.Nyquist; dspOutput1.AmplitudeSpectrogram = MFCCStuff.MelFilterBank( dspOutput1.AmplitudeSpectrogram, config.MelScale, recording.Nyquist, minFreq, maxFreq); dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; // TODO: This doesn't make any sense, since the frequency width changes for each bin. Probably need to set this to NaN. // TODO: Whatever uses this value below, should probably be changed to not be depending on it. dspOutput1.FreqBinWidth = sampleRate / (double)dspOutput1.AmplitudeSpectrogram.GetLength(1) / 2; } // NOW EXTRACT SIGNAL FOR BACKGROUND NOISE CALCULATION // If the index calculation duration >= 30 seconds, then calculate BGN from the existing segment of recording. bool doSeparateBgnNoiseCalculation = indexCalculationDuration.TotalSeconds + (2 * config.BgNoiseBuffer.TotalSeconds) < segmentDuration.TotalSeconds / 2; var dspOutput2 = dspOutput1; if (doSeparateBgnNoiseCalculation) { // GET a longer SUBSEGMENT FOR NOISE calculation with 5 sec buffer on either side. // If the index calculation duration is shorter than 30 seconds, then need to calculate BGN noise from a longer length of recording // i.e. need to add noiseBuffer either side. Typical noiseBuffer value = 5 seconds int sampleBuffer = (int)(config.BgNoiseBuffer.TotalSeconds * sampleRate); var bgnRecording = AudioRecording.GetRecordingSubsegment(recording, startSample, endSample, sampleBuffer); // EXTRACT ENVELOPE and SPECTROGRAM FROM BACKGROUND NOISE SUBSEGMENT dspOutput2 = DSP_Frames.ExtractEnvelopeAndFfts(bgnRecording, frameSize, frameStep); // If necessary, recalculate the spectrogram according to octave scale. This option works only when have high SR recordings. if (octaveScale) { // ASSUME fixed Occtave scale - USEFUL ONLY FOR JASCO 64000sr MARINE RECORDINGS // If you wish to use other octave scale types then need to put in the config file and and set up recovery here. dspOutput2.AmplitudeSpectrogram = OctaveFreqScale.AmplitudeSpectra( dspOutput2.AmplitudeSpectrogram, dspOutput2.WindowPower, sampleRate, epsilon, freqScale); dspOutput2.NyquistBin = dspOutput2.AmplitudeSpectrogram.GetLength(1) - 1; // ASSUMPTION!!! Nyquist is in top Octave bin - not necessarily true!! } } // ###################################### BEGIN CALCULATION OF INDICES ################################## // (A) ################################## EXTRACT SUMMARY INDICES FROM THE SIGNAL WAVEFORM ################################## // average absolute value over the minute recording - not useful // double[] avAbsolute = dspOutput1.Average; double[] signalEnvelope = dspOutput1.Envelope; double avgSignalEnvelope = signalEnvelope.Average(); // 10 times log of amplitude squared summaryIndices.AvgSignalAmplitude = 20 * Math.Log10(avgSignalEnvelope); // Deal with case where the signal waveform is continuous flat with values < 0.001. Has happened!! // Although signal appears zero, this condition is required. if (avgSignalEnvelope < 0.0001) { Logger.Debug("Segment skipped because avSignalEnvelope is < 0.001!"); summaryIndices.ZeroSignal = 1.0; return(result); } // i. Check for clipping and high amplitude rates per second summaryIndices.HighAmplitudeIndex = dspOutput1.HighAmplitudeCount / subsegmentDurationInSeconds; summaryIndices.ClippingIndex = dspOutput1.ClipCount / subsegmentDurationInSeconds; // ii. Calculate bg noise in dB // Convert signal envelope to dB and subtract background noise. Default noise SD to calculate threshold = ZERO double signalBgn = NoiseRemovalModal.CalculateBackgroundNoise(dspOutput2.Envelope); summaryIndices.BackgroundNoise = signalBgn; // iii: FRAME ENERGIES - convert signal to decibels and subtract background noise. double[] dBEnvelope = SNR.Signal2Decibels(dspOutput1.Envelope); double[] dBEnvelopeSansNoise = SNR.SubtractAndTruncate2Zero(dBEnvelope, signalBgn); // iv: ACTIVITY for NOISE REDUCED SIGNAL ENVELOPE // Calculate fraction of frames having acoustic activity var activity = ActivityAndCover.CalculateActivity(dBEnvelopeSansNoise, frameStepTimeSpan); summaryIndices.Activity = activity.FractionOfActiveFrames; // v. average number of events per second whose duration > one frame // average event duration in milliseconds - no longer calculated //summaryIndices.AvgEventDuration = activity.avEventDuration; summaryIndices.EventsPerSecond = activity.EventCount / subsegmentDurationInSeconds; // vi. Calculate SNR and active frames SNR summaryIndices.Snr = dBEnvelopeSansNoise.Max(); summaryIndices.AvgSnrOfActiveFrames = activity.ActiveAvDb; // vii. ENTROPY of ENERGY ENVELOPE -- 1-Ht because want measure of concentration of acoustic energy. double entropy = DataTools.EntropyNormalised(DataTools.SquareValues(signalEnvelope)); summaryIndices.TemporalEntropy = 1 - entropy; // Note that the spectrogram has had the DC bin removed. i.e. has only 256 columns. double[,] amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram; // get amplitude spectrogram. // CALCULATE various NDSI (Normalised difference soundscape Index) FROM THE AMPLITUDE SPECTROGRAM // These options proved to be highly correlated. Therefore only use tuple.Item 1 which derived from Power Spectral Density. var tuple3 = SpectrogramTools.CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram(amplitudeSpectrogram); summaryIndices.Ndsi = SpectrogramTools.CalculateNdsi(tuple3.Item1, sampleRate, 1000, 2000, 8000); // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ################################## // Get the oscillation spectral index OSC separately from signal because need a different frame size etc. var sampleLength = Oscillations2014.DefaultSampleLength; var frameLength = Oscillations2014.DefaultFrameLength; var sensitivity = Oscillations2014.DefaultSensitivityThreshold; var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(subsegmentRecording, frameLength, sampleLength, sensitivity); // double length of the vector because want to work with 256 element vector for LDFC purposes spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort); // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ################################## // i: CALCULATE SPECTRUM OF THE SUM OF FREQ BIN AMPLITUDES - used for later calculation of ACI spectralIndices.SUM = MatrixTools.SumColumns(amplitudeSpectrogram); // Calculate lower and upper boundary bin ids. // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from biophony. // Boundary of upper bird-band is to avoid high freq artefacts due to mp3. int lowerBinBound = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth); int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth); // calculate number of freq bins in the bird-band. int midBandBinCount = middleBinBound - lowerBinBound + 1; if (octaveScale) { // the above frequency bin bounds do not apply with octave scale. Need to recalculate them suitable for Octave scale recording. lowFreqBound = freqScale.LinearBound; lowerBinBound = freqScale.GetBinIdForHerzValue(lowFreqBound); midFreqBound = 8000; // This value appears suitable for Jasco Marine recordings. Not much happens above 8kHz. //middleBinBound = freqScale.GetBinIdForHerzValue(midFreqBound); middleBinBound = freqScale.GetBinIdInReducedSpectrogramForHerzValue(midFreqBound); midBandBinCount = middleBinBound - lowerBinBound + 1; } // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2. // original sample rate can be anything 11.0-44.1 kHz. int originalNyquist = sampleRateOfOriginalAudioFile / 2; // if upsampling has been done if (dspOutput1.NyquistFreq > originalNyquist) { dspOutput1.NyquistFreq = originalNyquist; dspOutput1.NyquistBin = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that binwidth does not change } // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX spectralIndices.DIF = AcousticComplexityIndex.SumOfAmplitudeDifferences(amplitudeSpectrogram); double[] aciSpectrum = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram); spectralIndices.ACI = aciSpectrum; // remove low freq band of ACI spectrum and store average ACI value double[] reducedAciSpectrum = DataTools.Subarray(aciSpectrum, lowerBinBound, midBandBinCount); summaryIndices.AcousticComplexity = reducedAciSpectrum.Average(); // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram); for (int i = 0; i < temporalEntropySpectrum.Length; i++) { temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i]; } spectralIndices.ENT = temporalEntropySpectrum; // iv: remove background noise from the amplitude spectrogram // First calculate the noise profile from the amplitude sepctrogram double[] spectralAmplitudeBgn = NoiseProfile.CalculateBackgroundNoise(dspOutput2.AmplitudeSpectrogram); amplitudeSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(amplitudeSpectrogram, spectralAmplitudeBgn); // AMPLITUDE THRESHOLD for smoothing background, nhThreshold, assumes background noise ranges around -40dB. // This value corresponds to approximately 6dB above backgorund. amplitudeSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(amplitudeSpectrogram, nhThreshold: 0.015); ////ImageTools.DrawMatrix(spectrogramData, @"C:\SensorNetworks\WavFiles\Crows\image.png", false); ////DataTools.writeBarGraph(modalValues); result.AmplitudeSpectrogram = amplitudeSpectrogram; // v: ENTROPY OF AVERAGE SPECTRUM & VARIANCE SPECTRUM - at this point the spectrogram is a noise reduced amplitude spectrogram var tuple = AcousticEntropy.CalculateSpectralEntropies(amplitudeSpectrogram, lowerBinBound, midBandBinCount); // ENTROPY of spectral averages - Reverse the values i.e. calculate 1-Hs and 1-Hv, and 1-Hcov for energy concentration summaryIndices.EntropyOfAverageSpectrum = 1 - tuple.Item1; // ENTROPY of spectrum of Variance values summaryIndices.EntropyOfVarianceSpectrum = 1 - tuple.Item2; // ENTROPY of spectrum of Coefficient of Variation values summaryIndices.EntropyOfCoVSpectrum = 1 - tuple.Item3; // vi: ENTROPY OF DISTRIBUTION of maximum SPECTRAL PEAKS. // First extract High band SPECTROGRAM which is now noise reduced double entropyOfPeaksSpectrum = AcousticEntropy.CalculateEntropyOfSpectralPeaks(amplitudeSpectrogram, lowerBinBound, middleBinBound); summaryIndices.EntropyOfPeaksSpectrum = 1 - entropyOfPeaksSpectrum; // ###################################################################################################################################################### // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ################################## // i: Convert amplitude spectrogram to deciBels and calculate the dB background noise profile double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput2.AmplitudeSpectrogram, dspOutput2.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(deciBelSpectrogram); spectralIndices.BGN = spectralDecibelBgn; // ii: Calculate the noise reduced decibel spectrogram derived from segment recording. // REUSE the var decibelSpectrogram but this time using dspOutput1. deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); deciBelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(deciBelSpectrogram, spectralDecibelBgn); deciBelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(deciBelSpectrogram, nhThreshold: 2.0); // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(deciBelSpectrogram); // iv: CALCULATE SPECTRAL COVER. // NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0 // FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb; // dB THRESHOLD for calculating spectral coverage var spActivity = ActivityAndCover.CalculateSpectralEvents(deciBelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound); spectralIndices.CVR = spActivity.CoverSpectrum; spectralIndices.EVN = spActivity.EventSpectrum; summaryIndices.HighFreqCover = spActivity.HighFreqBandCover; summaryIndices.MidFreqCover = spActivity.MidFreqBandCover; summaryIndices.LowFreqCover = spActivity.LowFreqBandCover; // ###################################################################################################################################################### // v: CALCULATE SPECTRAL PEAK TRACKS and RIDGE indices. // NOTE: at this point, the var decibelSpectrogram is noise reduced. i.e. all its values >= 0.0 // Detecting ridges or spectral peak tracks requires using a 5x5 mask which has edge effects. // This becomes significant if we have a short indexCalculationDuration. // Consequently if the indexCalculationDuration < 10 seconds then we revert back to the recording and cut out a recording segment that includes // a buffer for edge effects. In most cases however, we can just use the decibel spectrogram already calculated and ignore the edge effects. double peakThreshold = 6.0; //dB SpectralPeakTracks sptInfo; if (indexCalculationDuration.TotalSeconds < 10.0) { // calculate a new decibel spectrogram sptInfo = SpectralPeakTracks.CalculateSpectralPeakTracks(recording, startSample, endSample, frameSize, octaveScale, peakThreshold); } else { // use existing decibel spectrogram sptInfo = new SpectralPeakTracks(deciBelSpectrogram, peakThreshold); } spectralIndices.SPT = sptInfo.SptSpectrum; spectralIndices.RHZ = sptInfo.RhzSpectrum; spectralIndices.RVT = sptInfo.RvtSpectrum; spectralIndices.RPS = sptInfo.RpsSpectrum; spectralIndices.RNG = sptInfo.RngSpectrum; summaryIndices.SptDensity = sptInfo.TrackDensity; // these are two other indices that I tried but they do not seem to add anything of interest. //summaryIndices.AvgSptDuration = sptInfo.AvTrackDuration; //summaryIndices.SptPerSecond = sptInfo.TotalTrackCount / subsegmentSecondsDuration; // ###################################################################################################################################################### // vi: CLUSTERING - FIRST DETERMINE IF IT IS WORTH DOING // return if (activeFrameCount too small || eventCount == 0 || short index calc duration) because no point doing clustering if (activity.ActiveFrameCount <= 2 || Math.Abs(activity.EventCount) < 0.01 || indexCalculationDuration.TotalSeconds < 15) { // IN ADDITION return if indexCalculationDuration < 15 seconds because no point doing clustering on short time segment // NOTE: Activity was calculated with 3dB threshold AFTER backgroundnoise removal. //summaryIndices.AvgClusterDuration = TimeSpan.Zero; summaryIndices.ClusterCount = 0; summaryIndices.ThreeGramCount = 0; return(result); } // YES WE WILL DO CLUSTERING! to determine cluster count (spectral diversity) and spectral persistence. // Only use midband decibel SPECTRUM. In June 2016, the mid-band (i.e. the bird-band) was set to lowerBound=1000Hz, upperBound=8000hz. // Actually do clustering of binary spectra. Must first threshold double binaryThreshold = SpectralClustering.DefaultBinaryThresholdInDecibels; var midBandSpectrogram = MatrixTools.Submatrix(deciBelSpectrogram, 0, lowerBinBound, deciBelSpectrogram.GetLength(0) - 1, middleBinBound); var clusterInfo = SpectralClustering.ClusterTheSpectra(midBandSpectrogram, lowerBinBound, middleBinBound, binaryThreshold); // Store two summary index values from cluster info summaryIndices.ClusterCount = clusterInfo.ClusterCount; summaryIndices.ThreeGramCount = clusterInfo.TriGramUniqueCount; // As of May 2017, no longer store clustering results superimposed on spectrogram. // If you want to see this, then call the TEST methods in class SpectralClustering.cs. // ####################################################################################################################################################### // vii: set up other info to return var freqPeaks = SpectralPeakTracks.ConvertSpectralPeaksToNormalisedArray(deciBelSpectrogram); var scores = new List <Plot> { new Plot("Decibels", DataTools.normalise(dBEnvelopeSansNoise), ActivityAndCover.DefaultActivityThresholdDb), new Plot("Active Frames", DataTools.Bool2Binary(activity.ActiveFrames), 0.0), new Plot("Max Frequency", freqPeaks, 0.0), // relative location of freq maxima in spectra }; result.Hits = sptInfo.Peaks; result.TrackScores = scores; return(result); } // end Calculation of Summary and Spectral Indices