// ############################################################################################################################# // ################################# FOUR DIFFERENT METHODS TO CALCULATE THE BACKGROUND NOISE PROFILE // // (1) MODAL METHOD // (2) LOWEST PERCENTILE FRAMES METHOD // (3) BIN-WISE LOWEST PERCENTILE CELLS METHOD // (4) FIRST N FRAMES // ################## /// <summary> /// (1) MODAL METHOD /// Assumes the passed matrix is a spectrogram. i.e. rows=frames, cols=freq bins. /// Returns the noise profile over freq bins. i.e. one noise value per freq bin. /// </summary> /// <param name="matrix">the spectrogram with origin top-left</param> /// <param name="sdCount">number of standard deviations</param> public static NoiseProfile CalculateModalNoiseProfile(double[,] matrix, double sdCount) { int colCount = matrix.GetLength(1); double[] noiseMode = new double[colCount]; double[] noiseSd = new double[colCount]; double[] noiseThreshold = new double[colCount]; double[] minsOfBins = new double[colCount]; double[] maxsOfBins = new double[colCount]; for (int col = 0; col < colCount; col++) { double[] freqBin = MatrixTools.GetColumn(matrix, col); SNR.BackgroundNoise binNoise = SNR.CalculateModalBackgroundNoiseInSignal(freqBin, sdCount); noiseMode[col] = binNoise.NoiseMode; noiseSd[col] = binNoise.NoiseSd; noiseThreshold[col] = binNoise.NoiseThreshold; minsOfBins[col] = binNoise.MinDb; maxsOfBins[col] = binNoise.MaxDb; } var profile = new NoiseProfile() { NoiseMode = noiseMode, NoiseSd = noiseSd, NoiseThresholds = noiseThreshold, MinDb = minsOfBins, MaxDb = maxsOfBins, }; return(profile); }
/// <summary> /// Calls the algorithm of Lamel et al, 1981. /// IMPORTANT: The passed signal envelope values are absolute amplitude values derived from the framed waveform. /// These are converted to decibels before passing to the LAMEL method. /// NOTE: The returned background noise value ignores the SD part of the Gaussian noise model. /// </summary> /// <param name="signalEnvelope">Amplitude values</param> /// <returns>Modal noise value in decibels</returns> public static double CalculateBackgroundNoise(double[] signalEnvelope) { var dBarray = SNR.Signal2Decibels(signalEnvelope); CalculateNoiseUsingLamelsAlgorithm(dBarray, out double _, out double _, out double noiseMode, out double _); return(noiseMode); }
//################################################################################################################################## /// <summary> /// NOTE!!!! The decibel array has been normalised in 0 - 1. /// </summary> protected static Tuple <double[, ], double[]> MakeCepstrogram(SonogramConfig config, double[,] matrix, double[] decibels, int sampleRate) { double[,] m = matrix; int nyquist = sampleRate / 2; double epsilon = config.epsilon; bool includeDelta = config.mfccConfig.IncludeDelta; bool includeDoubleDelta = config.mfccConfig.IncludeDoubleDelta; //Log.WriteIfVerbose(" MakeCepstrogram(matrix, decibels, includeDelta=" + includeDelta + ", includeDoubleDelta=" + includeDoubleDelta + ")"); //(i) APPLY FILTER BANK int bandCount = config.mfccConfig.FilterbankCount; bool doMelScale = config.mfccConfig.DoMelScale; int ccCount = config.mfccConfig.CcCount; int fftBinCount = config.FreqBinCount; //number of Hz bands = 2^N +1. Subtract DC bin int minHz = config.MinFreqBand ?? 0; int maxHz = config.MaxFreqBand ?? nyquist; Log.WriteIfVerbose("ApplyFilterBank(): Dim prior to filter bank =" + matrix.GetLength(1)); //error check that filterBankCount < FFTbins if (bandCount > fftBinCount) { throw new Exception( "## FATAL ERROR in BaseSonogram.MakeCepstrogram():- Can't calculate cepstral coeff. FilterbankCount > FFTbins. (" + bandCount + " > " + fftBinCount + ")\n\n"); } //this is the filter count for full bandwidth 0-Nyquist. This number is trimmed proportionately to fit the required bandwidth. if (doMelScale) { m = MFCCStuff.MelFilterBank(m, bandCount, nyquist, minHz, maxHz); // using the Greg integral } else { m = MFCCStuff.LinearFilterBank(m, bandCount, nyquist, minHz, maxHz); } Log.WriteIfVerbose("\tDim after filter bank=" + m.GetLength(1) + " (Max filter bank=" + bandCount + ")"); //(ii) CONVERT AMPLITUDES TO DECIBELS m = MFCCStuff.DecibelSpectra(m, config.WindowPower, sampleRate, epsilon); //from spectrogram //(iii) NOISE REDUCTION var tuple1 = SNR.NoiseReduce(m, config.NoiseReductionType, config.NoiseReductionParameter); m = tuple1.Item1; //(iv) calculate cepstral coefficients m = MFCCStuff.Cepstra(m, ccCount); //(v) NormaliseMatrixValues m = DataTools.normalise(m); //(vi) Calculate the full range of MFCC coefficients ie including decibel and deltas, etc m = MFCCStuff.AcousticVectors(m, decibels, includeDelta, includeDoubleDelta); var tuple2 = Tuple.Create(m, tuple1.Item2); return(tuple2); // return matrix and full bandwidth modal noise profile }
public void LinearFrequencyScale() { var recordingPath = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); var opFileStem = "BAC2_20071008"; var outputDir = this.outputDirectory; var outputImagePath = Path.Combine(outputDir.FullName, "LinearScaleSonogram.png"); var recording = new AudioRecording(recordingPath); // specfied linear scale int nyquist = 11025; int frameSize = 1024; int hertzInterval = 1000; var freqScale = new FrequencyScale(nyquist, frameSize, hertzInterval); var fst = freqScale.ScaleType; var sonoConfig = new SonogramConfig { WindowSize = freqScale.FinalBinCount * 2, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // DO NOISE REDUCTION var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); sonogram.Data = dataMatrix; sonogram.Configuration.WindowSize = freqScale.WindowSize; var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image.Save(outputImagePath, ImageFormat.Png); // DO FILE EQUALITY TEST var stemOfExpectedFile = opFileStem + "_LinearScaleGridLineLocations.EXPECTED.json"; var stemOfActualFile = opFileStem + "_LinearScaleGridLineLocations.ACTUAL.json"; // Check that freqScale.GridLineLocations are correct var expectedFile1 = PathHelper.ResolveAsset("FrequencyScale\\" + stemOfExpectedFile); if (!expectedFile1.Exists) { LoggedConsole.WriteErrorLine("An EXPECTED results file does not exist. Test will fail!"); LoggedConsole.WriteErrorLine( $"If ACTUAL results file is correct, move it to dir `{PathHelper.TestResources}` and change its suffix to <.EXPECTED.json>"); } var resultFile1 = new FileInfo(Path.Combine(outputDir.FullName, stemOfActualFile)); Json.Serialise(resultFile1, freqScale.GridLineLocations); FileEqualityHelpers.TextFileEqual(expectedFile1, resultFile1); // Check that image dimensions are correct Assert.AreEqual(566, image.Height); Assert.AreEqual(1621, image.Width); }
/// <summary> /// This method takes an audio recording and returns an octave scale spectrogram. /// At the present time it only works for recordings with 64000 sample rate and returns a 256 bin sonogram. /// TODO: generalise this method for other recordings and octave scales. /// </summary> public static BaseSonogram ConvertRecordingToOctaveScaleSonogram(AudioRecording recording, FreqScaleType fst) { var freqScale = new FrequencyScale(fst); double windowOverlap = 0.75; var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = windowOverlap, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; // Generate amplitude sonogram and then conver to octave scale var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); // THIS IS THE CRITICAL LINE. // TODO: SHOULD DEVELOP A SEPARATE UNIT TEST for this method sonogram.Data = ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); // DO NOISE REDUCTION var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); sonogram.Data = dataMatrix; int windowSize = freqScale.FinalBinCount * 2; sonogram.Configuration.WindowSize = windowSize; sonogram.Configuration.WindowStep = (int)Math.Round(windowSize * (1 - windowOverlap)); return(sonogram); }
/// <summary> /// Use this method when want to match defined shape in target using cross-correlation. /// This was the method used by Stewart Gage. /// First set target and source to same dynamic range. /// Then NormaliseMatrixValues target and source to unit-length. /// </summary> public static Tuple <double[]> Execute_StewartGage(double[,] target, double dynamicRange, SpectrogramStandard sonogram, List <AcousticEvent> segments, int minHz, int maxHz, double minDuration) { Log.WriteLine("SEARCHING FOR EVENTS LIKE TARGET."); if (segments == null) { return(null); } int minBin = (int)(minHz / sonogram.FBinWidth); int maxBin = (int)(maxHz / sonogram.FBinWidth); int targetLength = target.GetLength(0); //adjust target's dynamic range to that set by user target = SNR.SetDynamicRange(target, 0.0, dynamicRange); //set event's dynamic range double[] v1 = DataTools.Matrix2Array(target); v1 = DataTools.normalise2UnitLength(v1); //var image = BaseSonogram.Data2ImageData(target); //ImageTools.DrawMatrix(image, 1, 1, @"C:\SensorNetworks\Output\FELT_Currawong\target.png"); double[] scores = new double[sonogram.FrameCount]; foreach (AcousticEvent av in segments) { Log.WriteLine("SEARCHING SEGMENT."); int startRow = (int)Math.Round(av.TimeStart * sonogram.FramesPerSecond); int endRow = (int)Math.Round(av.TimeEnd * sonogram.FramesPerSecond); if (endRow >= sonogram.FrameCount) { endRow = sonogram.FrameCount; } int stopRow = endRow - targetLength; if (stopRow <= startRow) { stopRow = startRow + 1; //want minimum of one row } int offset = targetLength / 2; for (int r = startRow; r < stopRow; r++) { double[,] matrix = DataTools.Submatrix(sonogram.Data, r, minBin, r + targetLength - 1, maxBin); matrix = SNR.SetDynamicRange(matrix, 0.0, dynamicRange); //set event's dynamic range //var image = BaseSonogram.Data2ImageData(matrix); //ImageTools.DrawMatrix(image, 1, 1, @"C:\SensorNetworks\Output\FELT_CURLEW\compare.png"); double[] v2 = DataTools.Matrix2Array(matrix); v2 = DataTools.normalise2UnitLength(v2); scores[r] = DataTools.DotProduct(v1, v2); //the Cross Correlation } // end of rows in segment } // foreach (AcousticEvent av in segments) var tuple = Tuple.Create(scores); return(tuple); }
public void LinearFrequencyScaleDefault() { // relative path because post-Build command transfers files to ...\\Work\GitHub\...\bin\Debug subfolder. var recordingPath = @"Recordings\BAC2_20071008-085040.wav"; var opFileStem = "BAC2_20071008"; var outputDir = this.outputDirectory; var outputImagePath = Path.Combine(outputDir.FullName, "DefaultLinearScaleSonogram.png"); var recording = new AudioRecording(recordingPath); // default linear scale var fst = FreqScaleType.Linear; var freqScale = new FrequencyScale(fst); var sonoConfig = new SonogramConfig { WindowSize = freqScale.FinalBinCount * 2, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); sonogram.Configuration.WindowSize = freqScale.WindowSize; // DO NOISE REDUCTION var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); sonogram.Data = dataMatrix; var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image.Save(outputImagePath, ImageFormat.Png); // DO UNIT TESTING var stemOfExpectedFile = opFileStem + "_DefaultLinearScaleGridLineLocations.EXPECTED.json"; var stemOfActualFile = opFileStem + "_DefaultLinearScaleGridLineLocations.ACTUAL.json"; // Check that freqScale.GridLineLocations are correct var expectedFile1 = new FileInfo("FrequencyScale\\" + stemOfExpectedFile); if (!expectedFile1.Exists) { LoggedConsole.WriteErrorLine("An EXPECTED results file does not exist. Test will fail!"); LoggedConsole.WriteErrorLine("If ACTUAL results file is correct, move it to dir <...\\TestResources\\FrequencyScale> and change its suffix to <.EXPECTED.json>"); } var resultFile1 = new FileInfo(Path.Combine(outputDir.FullName, stemOfActualFile)); Json.Serialise(resultFile1, freqScale.GridLineLocations); FileEqualityHelpers.TextFileEqual(expectedFile1, resultFile1); // Check that image dimensions are correct Assert.AreEqual(310, image.Height); Assert.AreEqual(3247, image.Width); }
public void LinearFrequencyScale() { var recordingPath = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); var outputImagePath = this.outputDirectory.CombineFile("DefaultLinearScaleSonogram.png"); var recording = new AudioRecording(recordingPath); // specfied linear scale int nyquist = 11025; int frameSize = 1024; int hertzInterval = 1000; var freqScale = new FrequencyScale(nyquist, frameSize, hertzInterval); var fst = freqScale.ScaleType; var sonoConfig = new SonogramConfig { WindowSize = freqScale.FinalBinCount * 2, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // DO NOISE REDUCTION var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); sonogram.Data = dataMatrix; sonogram.Configuration.WindowSize = freqScale.WindowSize; var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst, freqScale.GridLineLocations); image.Save(outputImagePath); var expected = new[, ] { { 46, 1000 }, { 92, 2000 }, { 139, 3000 }, { 185, 4000 }, { 232, 5000 }, { 278, 6000 }, { 325, 7000 }, { 371, 8000 }, { 417, 9000 }, { 464, 10000 }, { 510, 11000 }, }; Assert.That.MatricesAreEqual(expected, freqScale.GridLineLocations); // Check that image dimensions are correct Assert.AreEqual(566, image.Height); Assert.AreEqual(1621, image.Width); }
public void TestAnnotatedSonogramWithPlots() { // Make a decibel spectrogram var actualDecibelSpectrogram = new SpectrogramStandard(this.sonoConfig, this.recording.WavReader); // prepare normalisation bounds for three plots double minDecibels = -100.0; double maxDecibels = -50; //double decibelThreshold = 12.5 dB above -100 dB; var normThreshold = 0.25; //plot 1 int minHz = 2000; int maxHz = 3000; var decibelArray = SNR.CalculateFreqBandAvIntensity(actualDecibelSpectrogram.Data, minHz, maxHz, actualDecibelSpectrogram.NyquistFrequency); var normalisedIntensityArray = DataTools.NormaliseInZeroOne(decibelArray, minDecibels, maxDecibels); var plot1 = new Plot("Intensity 2-3 kHz", normalisedIntensityArray, normThreshold); //plot 2 minHz = 3000; maxHz = 4000; decibelArray = SNR.CalculateFreqBandAvIntensity(actualDecibelSpectrogram.Data, minHz, maxHz, actualDecibelSpectrogram.NyquistFrequency); normalisedIntensityArray = DataTools.NormaliseInZeroOne(decibelArray, minDecibels, maxDecibels); var plot2 = new Plot("Intensity 3-4 kHz", normalisedIntensityArray, normThreshold); //plot 3 minHz = 4000; maxHz = 5000; decibelArray = SNR.CalculateFreqBandAvIntensity(actualDecibelSpectrogram.Data, minHz, maxHz, actualDecibelSpectrogram.NyquistFrequency); normalisedIntensityArray = DataTools.NormaliseInZeroOne(decibelArray, minDecibels, maxDecibels); var plot3 = new Plot("Intensity 4-5 kHz", normalisedIntensityArray, normThreshold); // combine the plots var plots = new List <Plot> { plot1, plot2, plot3 }; // create three events var startOffset = TimeSpan.Zero; var events = new List <AcousticEvent> { new AcousticEvent(startOffset, 10.0, 10.0, 2000, 3000), new AcousticEvent(startOffset, 25.0, 10.0, 3000, 4000), new AcousticEvent(startOffset, 40.0, 10.0, 4000, 5000), }; var image = SpectrogramTools.GetSonogramPlusCharts(actualDecibelSpectrogram, events, plots, null); // create the image for visual confirmation image.Save(Path.Combine(this.outputDirectory.FullName, this.recording.BaseName + ".png")); Assert.AreEqual(1621, image.Width); Assert.AreEqual(647, image.Height); }
public void LinearFrequencyScaleDefault() { var recordingPath = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); var outputImagePath = this.outputDirectory.CombineFile("DefaultLinearScaleSonogram.png"); var recording = new AudioRecording(recordingPath); // default linear scale var fst = FreqScaleType.Linear; var freqScale = new FrequencyScale(fst); var sonoConfig = new SonogramConfig { WindowSize = freqScale.FinalBinCount * 2, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); sonogram.Configuration.WindowSize = freqScale.WindowSize; // DO NOISE REDUCTION var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); sonogram.Data = dataMatrix; var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst, freqScale.GridLineLocations); image.Save(outputImagePath); // Check that freqScale.GridLineLocations are correct var expected = new[, ] { { 23, 1000 }, { 46, 2000 }, { 69, 3000 }, { 92, 4000 }, { 116, 5000 }, { 139, 6000 }, { 162, 7000 }, { 185, 8000 }, { 208, 9000 }, { 232, 10000 }, { 255, 11000 }, }; Assert.That.MatricesAreEqual(expected, freqScale.GridLineLocations); // Check that image dimensions are correct Assert.AreEqual(310, image.Height); Assert.AreEqual(3247, image.Width); }
} //Analysis() public static Tuple <List <Dictionary <string, double> >, double[]> DetectGratingEvents(double[,] matrix, int colStep, double intensityThreshold) { bool doNoiseremoval = true; int minPeriod = 2; //both period values must be even numbers int maxPeriod = 20; //Note: 17.2 frames per second i.e. period=20 is just over 1s. int numberOfCycles = 4; int step = 1; int rowCount = matrix.GetLength(0); int colCount = matrix.GetLength(1); int numberOfColSteps = colCount / colStep; var events2return = new List <Dictionary <string, double> >(); double[] array2return = null; for (int b = 0; b < numberOfColSteps; b++) { int minCol = (b * colStep); int maxCol = minCol + colStep - 1; double[,] subMatrix = MatrixTools.Submatrix(matrix, 0, minCol, (rowCount - 1), maxCol); double[] amplitudeArray = MatrixTools.GetRowAverages(subMatrix); if (doNoiseremoval) { double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction SNR.BackgroundNoise bgn = SNR.SubtractBackgroundNoiseFromSignal(amplitudeArray, StandardDeviationCount); amplitudeArray = bgn.NoiseReducedSignal; } //var events = CrossCorrelation.DetectBarsEventsBySegmentationAndXcorrelation(amplitudeArray, intensityThreshold); var scores = Gratings.ScanArrayForGratingPattern(amplitudeArray, minPeriod, maxPeriod, numberOfCycles, step); var mergedOutput = Gratings.MergePeriodicScoreArrays(scores, minPeriod, maxPeriod); double[] intensity = mergedOutput.Item1; double[] periodicity = mergedOutput.Item2; var events = Gratings.ExtractPeriodicEvents(intensity, periodicity, intensityThreshold); foreach (Dictionary <string, double> item in events) { item[key_MIN_FREQBIN] = minCol; item[key_MAX_FREQBIN] = maxCol; events2return.Add(item); } if (b == 3) { array2return = amplitudeArray; //returned for debugging purposes only } } //for loop over bands of columns return(Tuple.Create(events2return, array2return)); }//end DetectGratingEvents()
private double[,] SobelEdgegram(double[,] matrix) { double[,] m = MFCCStuff.DecibelSpectra(matrix, this.Configuration.WindowPower, this.SampleRate, this.Configuration.epsilon); //from spectrogram //double[,] m = Speech.DecibelSpectra(matrix); //NOISE REDUCTION var output = SNR.NoiseReduce(m, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); this.SnrData.ModalNoiseProfile = output.Item2; return(ImageTools.SobelEdgeDetection(output.Item1)); }
public static AudioToSonogramResult AnalyseOneRecording( FileInfo sourceRecording, Dictionary <string, string> configDict, TimeSpan localEventStart, TimeSpan localEventEnd, int minHz, int maxHz, DirectoryInfo outDirectory) { // set a threshold for determining energy distribution in call // NOTE: value of this threshold depends on whether working with decibel, energy or amplitude values const double threshold = 9.0; int resampleRate = AppConfigHelper.DefaultTargetSampleRate; if (configDict.ContainsKey(AnalysisKeys.ResampleRate)) { resampleRate = int.Parse(configDict[AnalysisKeys.ResampleRate]); } configDict[ConfigKeys.Recording.Key_RecordingCallName] = sourceRecording.FullName; configDict[ConfigKeys.Recording.Key_RecordingFileName] = sourceRecording.Name; // 1: GET RECORDING and make temporary copy // put temp audio FileSegment in same directory as the required output image. var tempAudioSegment = TempFileHelper.NewTempFile(outDirectory, "wav"); // delete the temp audio file if it already exists. if (File.Exists(tempAudioSegment.FullName)) { File.Delete(tempAudioSegment.FullName); } // This line creates a temporary version of the source file downsampled as per entry in the config file MasterAudioUtility.SegmentToWav(sourceRecording, tempAudioSegment, new AudioUtilityRequest() { TargetSampleRate = resampleRate }); // 2: Generate sonogram image files AudioToSonogramResult result = GenerateSpectrogramImages(tempAudioSegment, configDict, outDirectory); // 3: GET the SNR statistics TimeSpan eventDuration = localEventEnd - localEventStart; result.SnrStatistics = SNR.Calculate_SNR_ShortRecording(tempAudioSegment, configDict, localEventStart, eventDuration, minHz, maxHz, threshold); // 4: Delete the temp file File.Delete(tempAudioSegment.FullName); return(result); }
public static Tuple <BaseSonogram, AcousticEvent, double[, ], double[], double[, ]> Execute_Extraction( AudioRecording recording, double eventStart, double eventEnd, int minHz, int maxHz, double frameOverlap, double backgroundThreshold, TimeSpan segmentStartOffset) { //ii: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = recording.BaseName; //sonoConfig.WindowSize = windowSize; sonoConfig.WindowOverlap = frameOverlap; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); Log.WriteLine("Frames: Size={0}, Count={1}, Duration={2:f1}ms, Overlap={5:f2}%, Offset={3:f1}ms, Frames/s={4:f1}", sonogram.Configuration.WindowSize, sonogram.FrameCount, (sonogram.FrameDuration * 1000), (sonogram.FrameStep * 1000), sonogram.FramesPerSecond, frameOverlap); int binCount = (int)(maxHz / sonogram.FBinWidth) - (int)(minHz / sonogram.FBinWidth) + 1; Log.WriteIfVerbose("Freq band: {0} Hz - {1} Hz. (Freq bin count = {2})", minHz, maxHz, binCount); //calculate the modal noise profile double SD_COUNT = 0.0; // number of noise standard deviations used to calculate noise threshold NoiseProfile profile = NoiseProfile.CalculateModalNoiseProfile(sonogram.Data, SD_COUNT); //calculate modal noise profile double[] modalNoise = DataTools.filterMovingAverage(profile.NoiseMode, 7); //smooth the noise profile //extract modal noise values of the required event double[] noiseSubband = SpectrogramTools.ExtractModalNoiseSubband(modalNoise, minHz, maxHz, false, sonogram.NyquistFrequency, sonogram.FBinWidth); //extract data values of the required event double[,] target = SpectrogramTools.ExtractEvent(sonogram.Data, eventStart, eventEnd, sonogram.FrameStep, minHz, maxHz, false, sonogram.NyquistFrequency, sonogram.FBinWidth); // create acoustic event with defined boundaries AcousticEvent ae = new AcousticEvent(segmentStartOffset, eventStart, eventEnd - eventStart, minHz, maxHz); ae.SetTimeAndFreqScales(sonogram.FramesPerSecond, sonogram.FBinWidth); //truncate noise sonogram.Data = SNR.TruncateBgNoiseFromSpectrogram(sonogram.Data, modalNoise); sonogram.Data = SNR.RemoveNeighbourhoodBackgroundNoise(sonogram.Data, backgroundThreshold); double[,] targetMinusNoise = SpectrogramTools.ExtractEvent(sonogram.Data, eventStart, eventEnd, sonogram.FrameStep, minHz, maxHz, false, sonogram.NyquistFrequency, sonogram.FBinWidth); return(Tuple.Create(sonogram, ae, target, noiseSubband, targetMinusNoise)); }
/// <summary> /// METHOD TO CHECK IF SPECIFIED linear FREQ SCALE IS WORKING /// Check it on standard one minute recording. /// </summary> public static void TESTMETHOD_LinearFrequencyScale() { var recordingPath = @"C:\SensorNetworks\SoftwareTests\TestRecordings\BAC2_20071008-085040.wav"; var outputDir = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo(); var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo(); var outputImagePath = Path.Combine(outputDir.FullName, "linearScaleSonogram.png"); var opFileStem = "BAC2_20071008"; var recording = new AudioRecording(recordingPath); // specfied linear scale int nyquist = 11025; int frameSize = 1024; int hertzInterval = 1000; var freqScale = new FrequencyScale(nyquist, frameSize, hertzInterval); var fst = freqScale.ScaleType; var sonoConfig = new SonogramConfig { WindowSize = freqScale.FinalBinCount * 2, WindowOverlap = 0.2, SourceFName = recording.BaseName, //NoiseReductionType = NoiseReductionType.Standard, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // DO NOISE REDUCTION var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); sonogram.Data = dataMatrix; sonogram.Configuration.WindowSize = freqScale.WindowSize; var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image.Save(outputImagePath); // DO FILE EQUALITY TEST string testName = "testName"; var expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "FrequencyLinearScaleTest.EXPECTED.json")); var resultFile = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "FrequencyLinearScaleTestResults.json")); Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations); TestTools.FileEqualityTest(testName, resultFile, expectedTestFile); LoggedConsole.WriteLine("Completed Linear Frequency Scale test"); Console.WriteLine("\n\n"); }
/// <summary> /// 10-percentile Noise Reduction /// </summary> public static double[,] NoiseReduction(double[,] matrix) { double[,] nrm = matrix; // calculate 10-percentile noise profile NoiseProfile profile = NoiseProfile.CalculatePercentileNoiseProfile(matrix, 10); // smooth the noise profile double[] smoothedProfile = DataTools.filterMovingAverage(profile.NoiseThresholds, width: 7); nrm = SNR.TruncateBgNoiseFromSpectrogram(nrm, smoothedProfile); return(nrm); }
public static double[,] GetDecibelSpectrogramNoiseReduced(AudioRecording recording, int frameSize) { int frameStep = frameSize; // get decibel spectrogram var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep); var spectrogram = MFCCStuff.DecibelSpectra(results.AmplitudeSpectrogram, results.WindowPower, recording.SampleRate, recording.Epsilon); // remove background noise from spectrogram double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(spectrogram); spectrogram = SNR.TruncateBgNoiseFromSpectrogram(spectrogram, spectralDecibelBgn); spectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogram, nhThreshold: 3.0); return(spectrogram); }
/// <summary> /// A FALSE-COLOUR VERSION OF DECIBEL SPECTROGRAM /// Taken and adapted from Spectrogram Image 5 in the method of CLASS Audio2InputForConvCNN.cs:. /// </summary> /// <param name="dbSpectrogramData">the sonogram data (NOT noise reduced). </param> public static Image <Rgb24> DrawStandardSpectrogramInFalseColour(double[,] dbSpectrogramData) { // Do NOISE REDUCTION double noiseReductionParameter = 2.0; var tuple = SNR.NoiseReduce(dbSpectrogramData, NoiseReductionType.Standard, noiseReductionParameter); double[,] nrSpectrogramData = tuple.Item1; // store data matrix double ridgeThreshold = 2.5; double[,] matrix = dbSpectrogramData; byte[,] hits = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold); // ################### RESEARCH QUESTION: // I tried different EXPERIMENTS IN NORMALISATION //double min; double max; //DataTools.MinMax(spectralSelection, out min, out max); //double range = max - min; // readjust min and max to create the effect of contrast stretching. It enhances the spectrogram a bit //double fractionalStretching = 0.2; //min = min + (range * fractionalStretching); //max = max - (range * fractionalStretching); //range = max - min; // ULTIMATELY THE BEST APPROACH APPEARED TO BE FIXED NORMALISATION BOUNDS double truncateMin = -95.0; double truncateMax = -30.0; double filterCoefficient = 0.75; double[,] dbSpectrogramNorm = SpectrogramTools.NormaliseSpectrogramMatrix(dbSpectrogramData, truncateMin, truncateMax, filterCoefficient); truncateMin = 0; truncateMax = 50; // nr = noise reduced double[,] nrSpectrogramNorm = SpectrogramTools.NormaliseSpectrogramMatrix(nrSpectrogramData, truncateMin, truncateMax, filterCoefficient); nrSpectrogramNorm = MatrixTools.BoundMatrix(nrSpectrogramNorm, 0.0, 0.9); nrSpectrogramNorm = MatrixTools.SquareRootOfValues(nrSpectrogramNorm); nrSpectrogramNorm = DataTools.normalise(nrSpectrogramNorm); // create image from normalised data var image = SpectrogramTools.CreateFalseColourDecibelSpectrogramForZooming(dbSpectrogramNorm, nrSpectrogramNorm, hits); return(image); }
/// <summary> /// METHOD TO CHECK IF Octave FREQ SCALE IS WORKING /// Check it on MARINE RECORDING from JASCO, SR=64000. /// 24 BIT JASCO RECORDINGS from GBR must be converted to 16 bit. /// ffmpeg -i source_file.wav -sample_fmt s16 out_file.wav /// e.g. ". C:\Work\Github\audio-analysis\Extra Assemblies\ffmpeg\ffmpeg.exe" -i "C:\SensorNetworks\WavFiles\MarineRecordings\JascoGBR\AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56.wav" -sample_fmt s16 "C:\SensorNetworks\Output\OctaveFreqScale\JascoeMarineGBR116bit.wav" /// ffmpeg binaries are in C:\Work\Github\audio-analysis\Extra Assemblies\ffmpeg /// </summary> public static void TESTMETHOD_OctaveFrequencyScale2() { var recordingPath = @"C:\SensorNetworks\SoftwareTests\TestRecordings\MarineJasco_AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56-16bit.wav"; var outputDir = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo(); var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo(); var outputImagePath = Path.Combine(outputDir.FullName, "JascoMarineGBR1.png"); var opFileStem = "JascoMarineGBR1"; var recording = new AudioRecording(recordingPath); var fst = FreqScaleType.Linear125Octaves7Tones28Nyquist32000; var freqScale = new FrequencyScale(fst); var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); // DO NOISE REDUCTION var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); sonogram.Data = dataMatrix; sonogram.Configuration.WindowSize = freqScale.WindowSize; var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image.Save(outputImagePath); // DO FILE EQUALITY TEST string testName = "test2"; var expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "FrequencyOctaveScaleTest2.EXPECTED.json")); var resultFile = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "FrequencyOctaveScaleTest2Results.json")); Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations); TestTools.FileEqualityTest(testName, resultFile, expectedTestFile); LoggedConsole.WriteLine("Completed Octave Frequency Scale " + testName); Console.WriteLine("\n\n"); }
/// <summary> /// This method produces four spectrograms using four different values of neighbour hood decibel threshold. /// It can be used for test purposes. /// </summary> /// <param name="deciBelSpectrogram">the noisy decibel spectrogram</param> /// <param name="xAxisInterval">x-axis tic interval</param> /// <param name="stepDuration">the x-axis times scale</param> /// <param name="nyquist">max freq value</param> /// <param name="hzInterval">y-axis frequency scale</param> /// <returns>Image containing four sepctrograms</returns> public static Image ModalNoiseRemovalAndGetSonograms( double[,] deciBelSpectrogram, TimeSpan xAxisInterval, TimeSpan stepDuration, int nyquist, int hzInterval) { // The number of SDs above the mean for noise removal. // Set sdCount = -0.5 becuase when sdCount >= zero, noies removal is a bit severe for environmental recordings. var sdCount = -0.5; var nrt = NoiseReductionType.Modal; var tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, sdCount); var noiseReducedSpectrogram1 = tuple.Item1; var title = "title1"; var image1 = DrawSonogram(noiseReducedSpectrogram1, xAxisInterval, stepDuration, nyquist, hzInterval, title); double dBThreshold = 0.0; // SPECTRAL dB THRESHOLD for smoothing background double[,] noiseReducedSpectrogram2 = SNR.RemoveNeighbourhoodBackgroundNoise(noiseReducedSpectrogram1, dBThreshold); title = "title2"; var image2 = DrawSonogram(noiseReducedSpectrogram2, xAxisInterval, stepDuration, nyquist, hzInterval, title); // SPECTRAL dB THRESHOLD for smoothing background dBThreshold = 3.0; noiseReducedSpectrogram2 = SNR.RemoveNeighbourhoodBackgroundNoise(noiseReducedSpectrogram1, dBThreshold); title = "title3"; var image3 = DrawSonogram(noiseReducedSpectrogram2, xAxisInterval, stepDuration, nyquist, hzInterval, title); // SPECTRAL dB THRESHOLD for smoothing background dBThreshold = 10.0; noiseReducedSpectrogram2 = SNR.RemoveNeighbourhoodBackgroundNoise(noiseReducedSpectrogram1, dBThreshold); title = "title4"; var image4 = DrawSonogram(noiseReducedSpectrogram2, xAxisInterval, stepDuration, nyquist, hzInterval, title); var array = new Image[4]; array[0] = image1; array[1] = image2; array[2] = image3; array[3] = image4; var combinedImage = ImageTools.CombineImagesVertically(array); return(combinedImage); }
/// <summary> /// Median Noise Reduction /// </summary> public static double[,] NoiseReduction(double[,] matrix) { double[,] nrm = matrix; // calculate modal noise profile // NoiseProfile profile = NoiseProfile.CalculateModalNoiseProfile(matrix, sdCount: 0.0); NoiseProfile profile = NoiseProfile.CalculateMedianNoiseProfile(matrix); // smooth the noise profile double[] smoothedProfile = DataTools.filterMovingAverage(profile.NoiseThresholds, width: 7); nrm = SNR.TruncateBgNoiseFromSpectrogram(nrm, smoothedProfile); // nrm = SNR.NoiseReduce_Standard(nrm, smoothedProfile, nhBackgroundThreshold: 2.0); return(nrm); }
public static double[,] GetAmplitudeSpectrogramNoiseReduced(AudioRecording recording, int frameSize) { int frameStep = frameSize; // get amplitude spectrogram and remove the DC column ie column zero. var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep); // remove background noise from the full amplitude spectrogram const double sdCount = 0.1; const double spectralBgThreshold = 0.003; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background var profile = NoiseProfile.CalculateModalNoiseProfile(results.AmplitudeSpectrogram, sdCount); //calculate noise profile - assumes a dB spectrogram. double[] noiseValues = DataTools.filterMovingAverage(profile.NoiseThresholds, 7); // smooth the noise profile var amplitudeSpectrogram = SNR.NoiseReduce_Standard(results.AmplitudeSpectrogram, noiseValues, spectralBgThreshold); return(amplitudeSpectrogram); }
/// <summary> /// Initializes a new instance of the <see cref="DecibelSpectrogram"/> class. /// </summary> public DecibelSpectrogram(AmplitudeSpectrogram amplitudeSpectrogram) { this.Configuration = amplitudeSpectrogram.Configuration; this.Attributes = amplitudeSpectrogram.Attributes; // (ii) CONVERT AMPLITUDES TO DECIBELS this.Data = MFCCStuff.DecibelSpectra(amplitudeSpectrogram.Data, this.Attributes.WindowPower, this.Attributes.SampleRate, this.Attributes.Epsilon); // (iii) NOISE REDUCTION var tuple = SNR.NoiseReduce(this.Data, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); this.Data = tuple.Item1; // store data matrix if (this.SnrData != null) { this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile } }
public void TestGetEventsAroundMaxima() { //string abbreviatedSpeciesName = "Pteropus"; string speciesName = "Pteropus species"; int minHz = 800; int maxHz = 8000; var minTimeSpan = TimeSpan.FromSeconds(0.15); var maxTimeSpan = TimeSpan.FromSeconds(0.8); double decibelThreshold = 9.0; TimeSpan segmentStartOffset = TimeSpan.Zero; var decibelArray = SNR.CalculateFreqBandAvIntensity(this.sonogram.Data, minHz, maxHz, this.sonogram.NyquistFrequency); // prepare plots double intensityNormalisationMax = 3 * decibelThreshold; var eventThreshold = decibelThreshold / intensityNormalisationMax; var normalisedIntensityArray = DataTools.NormaliseInZeroOne(decibelArray, 0, intensityNormalisationMax); var plot = new Plot(speciesName + " Territory", normalisedIntensityArray, eventThreshold); var plots = new List <Plot> { plot }; //iii: CONVERT decibel SCORES TO ACOUSTIC EVENTS var acousticEvents = AcousticEvent.GetEventsAroundMaxima( decibelArray, segmentStartOffset, minHz, maxHz, decibelThreshold, minTimeSpan, maxTimeSpan, this.sonogram.FramesPerSecond, this.sonogram.FBinWidth); Assert.AreEqual(10, acousticEvents.Count); Assert.AreEqual(new Rectangle(19, 1751, 168, 27), acousticEvents[0].GetEventAsRectangle()); Assert.AreEqual(new Rectangle(19, 1840, 168, 10), acousticEvents[2].GetEventAsRectangle()); Assert.AreEqual(new Rectangle(19, 1961, 168, 31), acousticEvents[5].GetEventAsRectangle()); Assert.AreEqual(new Rectangle(19, 2294, 168, 17), acousticEvents[7].GetEventAsRectangle()); Assert.AreEqual(new Rectangle(19, 2504, 168, 7), acousticEvents[9].GetEventAsRectangle()); //Assert.AreEqual(28.Seconds() + segmentOffset, stats.ResultStartSeconds.Seconds()); }
public void PcaWhiteningDefault() { var recordingPath = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); var fst = FreqScaleType.Linear; var freqScale = new FrequencyScale(fst); var recording = new AudioRecording(recordingPath); var sonoConfig = new SonogramConfig { WindowSize = freqScale.FinalBinCount * 2, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; // GENERATE AMPLITUDE SPECTROGRAM var spectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); spectrogram.Configuration.WindowSize = freqScale.WindowSize; // DO RMS NORMALIZATION spectrogram.Data = SNR.RmsNormalization(spectrogram.Data); // CONVERT NORMALIZED AMPLITUDE SPECTROGRAM TO dB SPECTROGRAM var sonogram = new SpectrogramStandard(spectrogram); // DO NOISE REDUCTION var dataMatrix = PcaWhitening.NoiseReduction(sonogram.Data); sonogram.Data = dataMatrix; // DO PCA WHITENING var whitenedSpectrogram = PcaWhitening.Whitening(sonogram.Data); // DO UNIT TESTING // check if the dimensions of the reverted spectrogram (second output of the pca whitening) is equal to the input matrix Assert.AreEqual(whitenedSpectrogram.Reversion.GetLength(0), sonogram.Data.GetLength(0)); Assert.AreEqual(whitenedSpectrogram.Reversion.GetLength(1), sonogram.Data.GetLength(1)); }
} // LocalPeaks() /// <summary> /// CALCULATEs SPECTRAL PEAK TRACKS: spectralIndices.SPT, RHZ, RVT, RPS, RNG /// This method is only called from IndexCalulate.analysis() when the IndexCalculation Duration is less than 10 seconds, /// because need to recalculate background noise etc. /// Otherwise the constructor of this class is called: sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold); /// NOTE: We require a noise reduced decibel spectrogram /// FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth. /// </summary> public static SpectralPeakTracks CalculateSpectralPeakTracks(AudioRecording recording, int sampleStart, int sampleEnd, int frameSize, bool octaveScale, double peakThreshold) { double epsilon = recording.Epsilon; int sampleRate = recording.WavReader.SampleRate; int bufferFrameCount = 2; // 2 because must allow for edge effects when using 5x5 grid to find ridges. int ridgeBuffer = frameSize * bufferFrameCount; var ridgeRecording = AudioRecording.GetRecordingSubsegment(recording, sampleStart, sampleEnd, ridgeBuffer); int frameStep = frameSize; var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(ridgeRecording, frameSize, frameStep); // Generate the ridge SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram // i: generate the SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram double[,] decibelSpectrogram; if (octaveScale) { var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); decibelSpectrogram = OctaveFreqScale.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon, freqScale); } else { decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon); } // calculate the noise profile var spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); double nhDecibelThreshold = 2.0; // SPECTRAL dB THRESHOLD for smoothing background decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhDecibelThreshold); // thresholds in decibels // double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second // TimeSpan frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); var sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold); return(sptInfo); }
/// <summary> /// This method normalizes a score array by subtracting the mode rather than the average of the array. /// This is because the noise is often not normally distributed but rather skewed. /// However, did not work well. /// </summary> public static List <Plot> SubtractModeAndSd(List <Plot> plots) { var opPlots = new List <Plot>(); // subtract average from each plot array foreach (var plot in plots) { var scores = plot.data; var bgn = SNR.CalculateModalBackgroundNoiseInSignal(scores, 1.0); var mode = bgn.NoiseMode; var sd = bgn.NoiseSd; // normalize the scores to z-scores for (int i = 0; i < scores.Length; i++) { // Convert scores to z-scores scores[i] = (scores[i] - mode) / sd; if (scores[i] < 0.0) { scores[i] = 0.0; } if (scores[i] > 4.0) { scores[i] = 4.0; } // normalize full scale to 4 SDs. scores[i] /= 4.0; } opPlots.Add(plot); } return(opPlots); }
}//end CONSTRUCTOR public override void Make(double[,] amplitudeM) { double[,] m = amplitudeM; // (i) IF REQUIRED CONVERT TO FULL BAND WIDTH MEL SCALE // Make sure you have Configuration.MelBinCount somewhere if (this.Configuration.DoMelScale) { m = MFCCStuff.MelFilterBank(m, this.Configuration.MelBinCount, this.NyquistFrequency, 0, this.NyquistFrequency); // using the Greg integral } // (ii) CONVERT AMPLITUDES TO DECIBELS m = MFCCStuff.DecibelSpectra(m, this.Configuration.WindowPower, this.SampleRate, this.Configuration.epsilon); // (iii) NOISE REDUCTION var tuple = SNR.NoiseReduce(m, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); this.Data = tuple.Item1; // store data matrix if (this.SnrData != null) { this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile } }
/// <summary> /// The CORE ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double decibelThreshold = double.Parse(configDict["DECIBEL_THRESHOLD"]); //dB double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double callDuration = double.Parse(configDict["CALL_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); int callSpan = (int)Math.Round(callDuration * framesPerSecond); //############################################################################################################################################# //ii: DETECT HARMONICS var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan); double[] dBArray = results.Item1; double[] intensity = results.Item2; //an array of periodicity scores double[] periodicity = results.Item3; //intensity = DataTools.filterMovingAverage(intensity, 3); int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < harmonicIntensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. //expect humans to have max < 1000 Hz double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < noiseBound; j++) { spectrum[j] = 0.0; } int maxIndex = DataTools.GetMaxIndex(spectrum); int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); double discount = 1.0; if (freqWithMaxPower < 1200) { discount = 0.0; } if (intensity[r] > harmonicIntensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS double maxPossibleScore = 0.5; int halfCallSpan = callSpan / 2; var predictedEvents = new List <AcousticEvent>(); for (int i = 0; i < rowCount; i++) { //assume one score position per crow call if (scoreArray[i] < 0.001) { continue; } double startTime = (i - halfCallSpan) / framesPerSecond; AcousticEvent ev = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz); ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth); ev.Score = scoreArray[i]; ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold //ev.Score_MaxPossible = maxPossibleScore; predictedEvents.Add(ev); } //for loop Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()
//////public static IndexCalculateResult Analysis( public static SpectralIndexValuesForContentDescription Analysis( AudioRecording recording, TimeSpan segmentOffsetTimeSpan, int sampleRateOfOriginalAudioFile, bool returnSonogramInfo = false) { // returnSonogramInfo = true; // if debugging double epsilon = recording.Epsilon; int sampleRate = recording.WavReader.SampleRate; //var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); var indexCalculationDuration = TimeSpan.FromSeconds(ContentSignatures.IndexCalculationDurationInSeconds); // Get FRAME parameters for the calculation of Acoustic Indices int frameSize = ContentSignatures.FrameSize; int frameStep = frameSize; // that is, windowOverlap = zero double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second var frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); // INITIALISE a RESULTS STRUCTURE TO return // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc. var config = new IndexCalculateConfig(); // sets some default values int freqBinCount = frameSize / 2; var indexProperties = GetIndexProperties(); ////////var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, segmentOffsetTimeSpan, config); var spectralIndices = new SpectralIndexValuesForContentDescription(); ///////result.SummaryIndexValues = null; ///////SpectralIndexValues spectralIndices = result.SpectralIndexValues; // set up default spectrogram to return ///////result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null; ///////result.Hits = null; ///////result.TrackScores = new List<Plot>(); // ################################## FINISHED SET-UP // ################################## NOW GET THE AMPLITUDE SPECTROGRAM // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT // Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns. var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep); var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram; // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ################################## // Get the oscillation spectral index OSC separately from signal because need a different frame size etc. var sampleLength = Oscillations2014.DefaultSampleLength; var frameLength = Oscillations2014.DefaultFrameLength; var sensitivity = Oscillations2014.DefaultSensitivityThreshold; var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(recording, frameLength, sampleLength, sensitivity); // double length of the vector because want to work with 256 element vector for spectrogram purposes spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort); // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ################################## // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2. // original sample rate can be anything 11.0-44.1 kHz. int originalNyquist = sampleRateOfOriginalAudioFile / 2; // if up-sampling has been done if (dspOutput1.NyquistFreq > originalNyquist) { dspOutput1.NyquistFreq = originalNyquist; dspOutput1.NyquistBin = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that bin width does not change } // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX spectralIndices.ACI = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram); // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram); for (int i = 0; i < temporalEntropySpectrum.Length; i++) { temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i]; } spectralIndices.ENT = temporalEntropySpectrum; // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ################################## // i: Convert amplitude spectrogram to decibels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); spectralIndices.BGN = spectralDecibelBgn; // ii: Calculate the noise reduced decibel spectrogram derived from segment recording. // REUSE the var decibelSpectrogram but this time using dspOutput1. decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(decibelSpectrogram); // ###################################################################################################################################################### // iv: CALCULATE SPECTRAL COVER. NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0 // FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth // dB THRESHOLD for calculating spectral coverage double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb; // Calculate lower and upper boundary bin ids. // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from bio-phony. int midFreqBound = config.MidFreqBound; int lowFreqBound = config.LowFreqBound; int lowerBinBound = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth); int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth); var spActivity = ActivityAndCover.CalculateSpectralEvents(decibelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound); //spectralIndices.CVR = spActivity.CoverSpectrum; spectralIndices.EVN = spActivity.EventSpectrum; ///////result.TrackScores = null; ///////return result; return(spectralIndices); } // end calculation of Six Spectral Indices