public static double[,] GetSpectrogramMatrix(AudioRecording recordingSegment, int frameLength) { // set up the default songram config object // var sonoConfig = new SonogramConfig // { // WindowSize = frameLength, // WindowOverlap = 0.0, // }; // BaseSonogram sonogram = new AmplitudeSonogram(sonoConfig, recordingSegment.WavReader); // Taking the decibel spectrogram also works // BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader); // Do not do noise removal - it is unnecessary because only taking frequency bins three at a time. // Can take square-root of amplitude spectrogram. This emphasizes the low amplitude features. // Omitting sqrt results in detection of fewer oscillations //sonogram.Data = MatrixTools.SquareRootOfValues(sonogram.Data); // remove the DC bin if it has not already been removed. // Assume test of divisible by 2 is good enough. // int binCount = sonogram.Data.GetLength(1); // if (!binCount.IsEven()) // { // sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.FrameCount - 1, binCount - 1); // } // AT: Switched to below method of extracting the spectrogram because BaseSonogram // does not allow small spectrograms (less than 0.2s) to calculated. var fft = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameLength, frameLength); return(fft.AmplitudeSpectrogram); }
public static double[,] GetDecibelSpectrogramNoiseReduced(AudioRecording recording, int frameSize) { int frameStep = frameSize; // get decibel spectrogram var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep); var spectrogram = MFCCStuff.DecibelSpectra(results.AmplitudeSpectrogram, results.WindowPower, recording.SampleRate, recording.Epsilon); // remove background noise from spectrogram double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(spectrogram); spectrogram = SNR.TruncateBgNoiseFromSpectrogram(spectrogram, spectralDecibelBgn); spectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogram, nhThreshold: 3.0); return(spectrogram); }
public static double[,] GetAmplitudeSpectrogramNoiseReduced(AudioRecording recording, int frameSize) { int frameStep = frameSize; // get amplitude spectrogram and remove the DC column ie column zero. var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep); // remove background noise from the full amplitude spectrogram const double sdCount = 0.1; const double spectralBgThreshold = 0.003; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background var profile = NoiseProfile.CalculateModalNoiseProfile(results.AmplitudeSpectrogram, sdCount); //calculate noise profile - assumes a dB spectrogram. double[] noiseValues = DataTools.filterMovingAverage(profile.NoiseThresholds, 7); // smooth the noise profile var amplitudeSpectrogram = SNR.NoiseReduce_Standard(results.AmplitudeSpectrogram, noiseValues, spectralBgThreshold); return(amplitudeSpectrogram); }
/// <summary> /// Initializes a new instance of the <see cref="AmplitudeSpectrogram"/> class. /// </summary> public AmplitudeSpectrogram(SpectrogramSettings config, WavReader wav) { this.Configuration = config; this.Attributes = new SpectrogramAttributes(); double minDuration = 1.0; if (wav.Time.TotalSeconds < minDuration) { LoggedConsole.WriteLine("Signal must at least {0} seconds long to produce a sonogram!", minDuration); return; } //set attributes for the current recording and spectrogram type this.Attributes.SampleRate = wav.SampleRate; this.Attributes.Duration = wav.Time; this.Attributes.NyquistFrequency = wav.SampleRate / 2; this.Attributes.Duration = wav.Time; this.Attributes.MaxAmplitude = wav.CalculateMaximumAmplitude(); this.Attributes.FrameDuration = TimeSpan.FromSeconds(this.Configuration.WindowSize / (double)wav.SampleRate); var recording = new AudioRecording(wav); var fftdata = DSP_Frames.ExtractEnvelopeAndFfts( recording, config.WindowSize, config.WindowOverlap, this.Configuration.WindowFunction); // now recover required data //epsilon is a signal dependent minimum amplitude value to prevent possible subsequent log of zero value. this.Attributes.Epsilon = fftdata.Epsilon; this.Attributes.WindowPower = fftdata.WindowPower; this.Attributes.FrameCount = fftdata.FrameCount; this.Data = fftdata.AmplitudeSpectrogram; // IF REQUIRED CONVERT TO MEL SCALE if (this.Configuration.DoMelScale) { // this mel scale conversion uses the "Greg integral" ! this.Data = MFCCStuff.MelFilterBank(this.Data, this.Configuration.MelBinCount, this.Attributes.NyquistFrequency, 0, this.Attributes.NyquistFrequency); } }
public static double SimilarityIndex2(double[] channelL, double[] channelR, double epsilon, int sampleRate) { //var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFFTs(subsegmentRecording, frameSize, frameStep); int frameSize = 512; int frameStep = 512; var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, frameSize, frameStep); var spgrmL = dspOutputL.AmplitudeSpectrogram; var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, frameSize, frameStep); var spgrmR = dspOutputR.AmplitudeSpectrogram; double similarityIndex = 0; // get spgrm dimensions - assume both spgrms have same dimensions int rowCount = spgrmL.GetLength(0); int colCount = spgrmL.GetLength(1); for (int r = 0; r < rowCount; r++) { for (int c = 0; c < colCount; c++) { double min = Math.Min(spgrmL[r, c], spgrmR[r, c]); double max = Math.Max(spgrmL[r, c], spgrmR[r, c]); double index = 0; if (max <= 0.000001) { index = min / 0.000001; } // to prevent division by zero. else //{ index = min / max; } { index = min * min / (max * max); } similarityIndex += index; // / Math.Max(L, R); } } return(similarityIndex / (rowCount * colCount)); }
public void TestStandardNoiseRemoval() { var recording = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav")); int windowSize = 512; var sr = recording.SampleRate; // window overlap is used only for sonograms. It is not used when calculating acoustic indices. double windowOverlap = 0.0; var windowFunction = WindowFunctions.HAMMING.ToString(); var fftdata = DSP_Frames.ExtractEnvelopeAndFfts( recording, windowSize, windowOverlap, windowFunction); // Now recover the data // The following data is required when constructing sonograms //var duration = recording.WavReader.Time; //var frameCount = fftdata.FrameCount; //var fractionOfHighEnergyFrames = fftdata.FractionOfHighEnergyFrames; double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(fftdata.AmplitudeSpectrogram, fftdata.WindowPower, sr, fftdata.Epsilon); // The following call to NoiseProfile.CalculateBackgroundNoise(double[,] spectrogram) // returns a noise profile that is used as the BGN spectral index. // It calculates the modal background noise for each freqeuncy bin and then returns a smoothed version. // By default, the number of SDs = 0 and the smoothing window = 7. // Method assumes that the passed spectrogram is oriented as: rows=frames, cols=freq bins.</param> double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(deciBelSpectrogram); var resourcesDir = PathHelper.ResolveAssetPath("Indices"); var expectedSpectrumFile = new FileInfo(resourcesDir + "\\NoiseProfile.bin"); //Binary.Serialize(expectedSpectrumFile, spectralDecibelBgn); var expectedVector = Binary.Deserialize <double[]>(expectedSpectrumFile); CollectionAssert.That.AreEqual(expectedVector, spectralDecibelBgn, 0.000_000_001); }
} // LocalPeaks() /// <summary> /// CALCULATEs SPECTRAL PEAK TRACKS: spectralIndices.SPT, RHZ, RVT, RPS, RNG /// This method is only called from IndexCalulate.analysis() when the IndexCalculation Duration is less than 10 seconds, /// because need to recalculate background noise etc. /// Otherwise the constructor of this class is called: sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold); /// NOTE: We require a noise reduced decibel spectrogram /// FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth. /// </summary> public static SpectralPeakTracks CalculateSpectralPeakTracks(AudioRecording recording, int sampleStart, int sampleEnd, int frameSize, bool octaveScale, double peakThreshold) { double epsilon = recording.Epsilon; int sampleRate = recording.WavReader.SampleRate; int bufferFrameCount = 2; // 2 because must allow for edge effects when using 5x5 grid to find ridges. int ridgeBuffer = frameSize * bufferFrameCount; var ridgeRecording = AudioRecording.GetRecordingSubsegment(recording, sampleStart, sampleEnd, ridgeBuffer); int frameStep = frameSize; var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(ridgeRecording, frameSize, frameStep); // Generate the ridge SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram // i: generate the SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram double[,] decibelSpectrogram; if (octaveScale) { var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); decibelSpectrogram = OctaveFreqScale.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon, freqScale); } else { decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon); } // calculate the noise profile var spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); double nhDecibelThreshold = 2.0; // SPECTRAL dB THRESHOLD for smoothing background decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhDecibelThreshold); // thresholds in decibels // double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second // TimeSpan frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); var sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold); return(sptInfo); }
public static void AssertFrequencyInSignal(WavReader wavReader, double[] signal, int[] frequencies, int variance = 1) { var fft = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(signal, wavReader.SampleRate, wavReader.Epsilon, 512, 0.0); var histogram = SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram(fft.AmplitudeSpectrogram); var max = histogram.Max(); double threshold = max * 0.8; var highBins = frequencies.Select(f => (int)(f / fft.FreqBinWidth)).ToArray(); bool isOk = true; for (int bin = 0; bin < histogram.Length; bin++) { var value = histogram[bin]; if (value > threshold) { bool anyMatch = false; foreach (var highBin in highBins) { if (bin >= highBin - variance && bin <= highBin + variance) { anyMatch = true; break; } } isOk = anyMatch; } if (!isOk) { break; } } BaseTest.Assert.IsTrue(isOk); }
/// <summary> /// Calculates the following spectrograms as per settings in the Images array in the config file: Towsey.SpectrogramGenerator.yml: /// Waveform. /// DecibelSpectrogram. /// DecibelSpectrogramNoiseReduced. /// CepstralSpectrogram. /// DifferenceSpectrogram. /// AmplitudeSpectrogramLocalContrastNormalization. /// Experimental. /// Comment the config.yml file with a hash, those spectrograms that are not required. /// </summary> /// <param name="sourceRecording">The name of the original recording.</param> /// <param name="config">Contains parameter info to make spectrograms.</param> /// <param name="sourceRecordingName">.Name of source recording. Required only spectrogram labels.</param> public static AudioToSonogramResult GenerateSpectrogramImages( FileInfo sourceRecording, SpectrogramGeneratorConfig config, string sourceRecordingName) { //int signalLength = recordingSegment.WavReader.GetChannel(0).Length; var recordingSegment = new AudioRecording(sourceRecording.FullName); int sampleRate = recordingSegment.WavReader.SampleRate; var result = new AudioToSonogramResult(); var requestedImageTypes = config.Images ?? new[] { SpectrogramImageType.DecibelSpectrogram }; var @do = requestedImageTypes.ToHashSet(); int frameSize = config.GetIntOrNull("FrameLength") ?? 512; int frameStep = config.GetIntOrNull("FrameStep") ?? 441; // must calculate this because used later on. double frameOverlap = (frameSize - frameStep) / (double)frameSize; // Default noiseReductionType = Standard var bgNoiseThreshold = config.BgNoiseThreshold; // threshold for drawing the difference spectrogram var differenceThreshold = config.DifferenceThreshold; // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameSize, frameStep); var sonoConfig = new SonogramConfig() { epsilon = recordingSegment.Epsilon, SampleRate = sampleRate, WindowSize = frameSize, WindowStep = frameStep, WindowOverlap = frameOverlap, WindowPower = dspOutput1.WindowPower, Duration = recordingSegment.Duration, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = bgNoiseThreshold, }; var images = new Dictionary <SpectrogramImageType, Image <Rgb24> >(requestedImageTypes.Length); // IMAGE 1) draw the WAVEFORM if (@do.Contains(Waveform)) { var minValues = dspOutput1.MinFrameValues; var maxValues = dspOutput1.MaxFrameValues; int height = config.WaveformHeight; var waveformImage = GetWaveformImage(minValues, maxValues, height); // add in the title bar and time scales. string title = $"WAVEFORM - {sourceRecordingName} (min value={dspOutput1.MinSignalValue:f3}, max value={dspOutput1.MaxSignalValue:f3})"; var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram( title, waveformImage.Width, ImageTags[Waveform]); var startTime = TimeSpan.Zero; var xAxisTicInterval = TimeSpan.FromSeconds(1); TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(frameStep / (double)sampleRate); var labelInterval = TimeSpan.FromSeconds(5); waveformImage = BaseSonogram.FrameSonogram( waveformImage, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval); images.Add(Waveform, waveformImage); } // Draw various decibel spectrograms var decibelTypes = new[] { SpectrogramImageType.DecibelSpectrogram, DecibelSpectrogramNoiseReduced, DifferenceSpectrogram, Experimental }; if (@do.Overlaps(decibelTypes)) { // disable noise removal for first two spectrograms var disabledNoiseReductionType = sonoConfig.NoiseReductionType; sonoConfig.NoiseReductionType = NoiseReductionType.None; //Get the decibel spectrogram var decibelSpectrogram = new SpectrogramStandard(sonoConfig, dspOutput1.AmplitudeSpectrogram); result.DecibelSpectrogram = decibelSpectrogram; double[,] dbSpectrogramData = (double[, ])decibelSpectrogram.Data.Clone(); // IMAGE 2) Display the DecibelSpectrogram if (@do.Contains(SpectrogramImageType.DecibelSpectrogram)) { images.Add( SpectrogramImageType.DecibelSpectrogram, decibelSpectrogram.GetImageFullyAnnotated( $"DECIBEL SPECTROGRAM ({sourceRecordingName})", ImageTags[SpectrogramImageType.DecibelSpectrogram])); } if (@do.Overlaps(new[] { DecibelSpectrogramNoiseReduced, Experimental, CepstralSpectrogram })) { sonoConfig.NoiseReductionType = disabledNoiseReductionType; sonoConfig.NoiseReductionParameter = bgNoiseThreshold; double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram.Data); decibelSpectrogram.Data = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram.Data, spectralDecibelBgn); decibelSpectrogram.Data = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram.Data, nhThreshold: bgNoiseThreshold); // IMAGE 3) DecibelSpectrogram - noise reduced if (@do.Contains(DecibelSpectrogramNoiseReduced)) { images.Add( DecibelSpectrogramNoiseReduced, decibelSpectrogram.GetImageFullyAnnotated( $"DECIBEL SPECTROGRAM + Lamel noise subtraction. ({sourceRecordingName})", ImageTags[DecibelSpectrogramNoiseReduced])); } // IMAGE 4) EXPERIMENTAL Spectrogram if (@do.Contains(Experimental)) { sonoConfig.NoiseReductionType = disabledNoiseReductionType; images.Add( Experimental, GetDecibelSpectrogram_Ridges( dbSpectrogramData, decibelSpectrogram, sourceRecordingName)); } } // IMAGE 5) draw difference spectrogram. This is derived from the original decibel spectrogram if (@do.Contains(DifferenceSpectrogram)) { //var differenceThreshold = configInfo.GetDoubleOrNull("DifferenceThreshold") ?? 3.0; var differenceImage = GetDifferenceSpectrogram(dbSpectrogramData, differenceThreshold); differenceImage = BaseSonogram.GetImageAnnotatedWithLinearHertzScale( differenceImage, sampleRate, frameStep, $"DECIBEL DIFFERENCE SPECTROGRAM ({sourceRecordingName})", ImageTags[DifferenceSpectrogram]); images.Add(DifferenceSpectrogram, differenceImage); } } // IMAGE 6) Cepstral Spectrogram if (@do.Contains(CepstralSpectrogram)) { images.Add( CepstralSpectrogram, GetCepstralSpectrogram(sonoConfig, recordingSegment, sourceRecordingName)); } // IMAGE 7) AmplitudeSpectrogram_LocalContrastNormalization if (@do.Contains(AmplitudeSpectrogramLocalContrastNormalization)) { var neighborhoodSeconds = config.NeighborhoodSeconds; var lcnContrastParameter = config.LcnContrastLevel; images.Add( AmplitudeSpectrogramLocalContrastNormalization, GetLcnSpectrogram( sonoConfig, recordingSegment, sourceRecordingName, neighborhoodSeconds, lcnContrastParameter)); } // now pick and combine images in order user specified var sortedImages = requestedImageTypes.Select(x => images[x]); // COMBINE THE SPECTROGRAM IMAGES result.CompositeImage = ImageTools.CombineImagesVertically(sortedImages.ToArray()); return(result); }
/// <summary> /// Calculates the following spectrograms as per content of config.yml file: /// Waveform: true. /// DifferenceSpectrogram: true. /// DecibelSpectrogram: true. /// DecibelSpectrogram_NoiseReduced: true. /// DecibelSpectrogram_Ridges: true. /// AmplitudeSpectrogram_LocalContrastNormalization: true. /// SoxSpectrogram: false. /// Experimental: true. /// </summary> /// <param name="sourceRecording">The name of the original recording.</param> /// <param name="configInfo">Contains parameter info to make spectrograms.</param> /// <param name="sourceRecordingName">.Name of source recording. Required only spectrogram labels.</param> public static AudioToSonogramResult GenerateSpectrogramImages( FileInfo sourceRecording, AnalyzerConfig configInfo, string sourceRecordingName) { //int signalLength = recordingSegment.WavReader.GetChannel(0).Length; var recordingSegment = new AudioRecording(sourceRecording.FullName); int sampleRate = recordingSegment.WavReader.SampleRate; var result = new AudioToSonogramResult(); // init the image stack var list = new List <Image>(); bool doWaveForm = configInfo.GetBoolOrNull("Waveform") ?? false; bool doDecibelSpectrogram = configInfo.GetBoolOrNull("DecibelSpectrogram") ?? false; bool doNoiseReducedSpectrogram = configInfo.GetBoolOrNull("DecibelSpectrogram_NoiseReduced") ?? true; bool doDifferenceSpectrogram = configInfo.GetBoolOrNull("DifferenceSpectrogram") ?? false; bool doLcnSpectrogram = configInfo.GetBoolOrNull("AmplitudeSpectrogram_LocalContrastNormalization") ?? false; bool doCepstralSpectrogram = configInfo.GetBoolOrNull("CepstralSpectrogram") ?? false; bool doExperimentalSpectrogram = configInfo.GetBoolOrNull("Experimental") ?? false; //Don't do SOX spectrogram. //bool doSoxSpectrogram = configInfo.GetBool("SoxSpectrogram"); int frameSize = configInfo.GetIntOrNull("FrameLength") ?? 512; int frameStep = configInfo.GetIntOrNull("FrameStep") ?? 0; // must calculate this because used later on. double frameOverlap = (frameSize - frameStep) / (double)frameSize; // Default noiseReductionType = Standard var bgNoiseThreshold = configInfo.GetDoubleOrNull("BgNoiseThreshold") ?? 3.0; // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameSize, frameStep); var sonoConfig = new SonogramConfig() { epsilon = recordingSegment.Epsilon, SampleRate = sampleRate, WindowSize = frameSize, WindowStep = frameStep, WindowOverlap = frameOverlap, WindowPower = dspOutput1.WindowPower, Duration = recordingSegment.Duration, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = bgNoiseThreshold, }; // IMAGE 1) draw the WAVEFORM if (doWaveForm) { var minValues = dspOutput1.MinFrameValues; var maxValues = dspOutput1.MaxFrameValues; int height = configInfo.GetIntOrNull("WaveformHeight") ?? 180; var waveformImage = GetWaveformImage(minValues, maxValues, height); // add in the title bar and time scales. string title = $"WAVEFORM - {sourceRecordingName} (min value={dspOutput1.MinSignalValue:f3}, max value={dspOutput1.MaxSignalValue:f3})"; var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(title, waveformImage.Width); var startTime = TimeSpan.Zero; var xAxisTicInterval = TimeSpan.FromSeconds(1); TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(frameStep / (double)sampleRate); var labelInterval = TimeSpan.FromSeconds(5); waveformImage = BaseSonogram.FrameSonogram(waveformImage, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval); list.Add(waveformImage); } // Draw various decibel spectrograms if (doDecibelSpectrogram || doNoiseReducedSpectrogram || doDifferenceSpectrogram || doExperimentalSpectrogram) { // disable noise removal for first spectrogram var disabledNoiseReductionType = sonoConfig.NoiseReductionType; sonoConfig.NoiseReductionType = NoiseReductionType.None; //Get the decibel spectrogram var decibelSpectrogram = new SpectrogramStandard(sonoConfig, dspOutput1.AmplitudeSpectrogram); result.DecibelSpectrogram = decibelSpectrogram; double[,] dbSpectrogramData = (double[, ])decibelSpectrogram.Data.Clone(); // IMAGE 2) DecibelSpectrogram if (doDecibelSpectrogram) { var image3 = decibelSpectrogram.GetImageFullyAnnotated($"DECIBEL SPECTROGRAM ({sourceRecordingName})"); list.Add(image3); } if (doNoiseReducedSpectrogram || doExperimentalSpectrogram || doDifferenceSpectrogram) { sonoConfig.NoiseReductionType = disabledNoiseReductionType; sonoConfig.NoiseReductionParameter = bgNoiseThreshold; double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram.Data); decibelSpectrogram.Data = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram.Data, spectralDecibelBgn); decibelSpectrogram.Data = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram.Data, nhThreshold: bgNoiseThreshold); // IMAGE 3) DecibelSpectrogram - noise reduced if (doNoiseReducedSpectrogram) { var image4 = decibelSpectrogram.GetImageFullyAnnotated($"DECIBEL SPECTROGRAM + Lamel noise subtraction. ({sourceRecordingName})"); list.Add(image4); } // IMAGE 4) EXPERIMENTAL Spectrogram if (doExperimentalSpectrogram) { sonoConfig.NoiseReductionType = disabledNoiseReductionType; var image5 = GetDecibelSpectrogram_Ridges(dbSpectrogramData, decibelSpectrogram, sourceRecordingName); list.Add(image5); } // IMAGE 5) draw difference spectrogram if (doDifferenceSpectrogram) { var differenceThreshold = configInfo.GetDoubleOrNull("DifferenceThreshold") ?? 3.0; var image6 = GetDifferenceSpectrogram(dbSpectrogramData, differenceThreshold); image6 = BaseSonogram.GetImageAnnotatedWithLinearHertzScale(image6, sampleRate, frameStep, $"DECIBEL DIFFERENCE SPECTROGRAM ({sourceRecordingName})"); list.Add(image6); } } } // IMAGE 6) Cepstral Spectrogram if (doCepstralSpectrogram) { var image6 = GetCepstralSpectrogram(sonoConfig, recordingSegment, sourceRecordingName); list.Add(image6); } // 7) AmplitudeSpectrogram_LocalContrastNormalization if (doLcnSpectrogram) { var neighbourhoodSeconds = configInfo.GetDoubleOrNull("NeighbourhoodSeconds") ?? 0.5; var lcnContrastParameter = configInfo.GetDoubleOrNull("LcnContrastLevel") ?? 0.4; var image8 = GetLcnSpectrogram(sonoConfig, recordingSegment, sourceRecordingName, neighbourhoodSeconds, lcnContrastParameter); list.Add(image8); } // 8) SOX SPECTROGRAM //if (doSoxSpectrogram) //{ //Log.Warn("SoX spectrogram set to true but is ignored when running as an IAnalyzer"); // The following parameters were once used to implement a sox spectrogram. //bool makeSoxSonogram = configuration.GetBoolOrNull(AnalysisKeys.MakeSoxSonogram) ?? false; //configDict[AnalysisKeys.SonogramTitle] = configuration[AnalysisKeys.SonogramTitle] ?? "Sonogram"; //configDict[AnalysisKeys.SonogramComment] = configuration[AnalysisKeys.SonogramComment] ?? "Sonogram produced using SOX"; //configDict[AnalysisKeys.SonogramColored] = configuration[AnalysisKeys.SonogramColored] ?? "false"; //configDict[AnalysisKeys.SonogramQuantisation] = configuration[AnalysisKeys.SonogramQuantisation] ?? "128"; //configDict[AnalysisKeys.AddTimeScale] = configuration[AnalysisKeys.AddTimeScale] ?? "true"; //configDict[AnalysisKeys.AddAxes] = configuration[AnalysisKeys.AddAxes] ?? "true"; //configDict[AnalysisKeys.AddSegmentationTrack] = configuration[AnalysisKeys.AddSegmentationTrack] ?? "true"; // var soxFile = new FileInfo(Path.Combine(output.FullName, sourceName + "SOX.png")); // SpectrogramTools.MakeSonogramWithSox(sourceRecording, configDict, path2SoxSpectrogram); // list.Add(image7); //} // COMBINE THE SPECTROGRAM IMAGES result.CompositeImage = ImageTools.CombineImagesVertically(list); return(result); }
/// <summary> /// Initializes a new instance of the <see cref="BaseSonogram"/> class. /// BASE CONSTRUCTOR /// This constructor contains all steps required to prepare the amplitude spectrogram. /// The third boolean parameter is simply a place-filler to ensure a different Constructor signature. /// from the principle Constructor which follows. /// </summary> /// <param name="config">config file to use.</param> /// <param name="wav">wav.</param> /// <param name="dummy">filler boolean. Calculate in method.</param> public BaseSonogram(SonogramConfig config, WavReader wav, bool dummy) : this(config) { // As of 28 March 2017 drop capability to get sub-band of spectrogram because was not being used. // can be recovered later if desired. //bool doExtractSubband = this.SubBandMinHz > 0 || this.SubBandMaxHz < this.NyquistFrequency; this.Duration = wav.Time; double minDuration = 0.2; if (this.Duration.TotalSeconds < minDuration) { LoggedConsole.WriteLine("Signal must at least {0} seconds long to produce a sonogram!", minDuration); return; } //set config params to the current recording this.SampleRate = wav.SampleRate; this.Configuration.Duration = wav.Time; this.Configuration.SampleRate = wav.SampleRate; //also set the Nyquist this.MaxAmplitude = wav.CalculateMaximumAmplitude(); var recording = new AudioRecording(wav); var fftData = DSP_Frames.ExtractEnvelopeAndFfts( recording, config.WindowSize, config.WindowOverlap, this.Configuration.WindowFunction); // now recover required data //epsilon is a signal dependent minimum amplitude value to prevent possible subsequent log of zero value. this.Configuration.epsilon = fftData.Epsilon; this.Configuration.WindowPower = fftData.WindowPower; this.FrameCount = fftData.FrameCount; this.DecibelsPerFrame = fftData.FrameDecibels; //init normalised signal energy array but do nothing with it. This has to be done from outside this.DecibelsNormalised = new double[this.FrameCount]; this.Data = fftData.AmplitudeSpectrogram; // ENERGY PER FRAME and NORMALISED dB PER FRAME AND SNR // currently DoSnr = true by default if (config.DoSnr) { // If the FractionOfHighEnergyFrames PRIOR to noise removal exceeds SNR.FractionalBoundForMode, // then Lamel's noise removal algorithm may not work well. if (fftData.FractionOfHighEnergyFrames > SNR.FractionalBoundForMode) { Log.WriteIfVerbose("\nWARNING ##############"); Log.WriteIfVerbose( "\t############### BaseSonogram(): This is a high energy recording. Percent of high energy frames = {0:f0} > {1:f0}%", fftData.FractionOfHighEnergyFrames * 100, SNR.FractionalBoundForMode * 100); Log.WriteIfVerbose("\t############### Noise reduction algorithm may not work well in this instance!\n"); } //AUDIO SEGMENTATION/END POINT DETECTION - based on Lamel et al // Setting segmentation/endpoint detection parameters is broken as of September 2014. // The next line is a hack replacement EndpointDetectionConfiguration.SetDefaultSegmentationConfig(); this.SigState = EndpointDetectionConfiguration.DetermineVocalisationEndpoints(this.DecibelsPerFrame, this.FrameStep); } /* AS OF 30 MARCH 2017, NO LONGER IMPLEMENT SUB-BAND THINGS, because not being used for years. * // EXTRACT REQUIRED FREQUENCY BAND * if (doExtractSubband) * { * this.Data = SpectrogramTools.ExtractFreqSubband( * this.Data, * this.subBandMinHz, * this.subBandMaxHz, * this.Configuration.DoMelScale, * this.Configuration.FreqBinCount, * this.FBinWidth); * this.CalculateSubbandSNR(this.Data); * } */ }
public static void Execute(Arguments arguments) { const string Title = "# DETERMINING SIGNAL TO NOISE RATIO IN RECORDING"; string date = "# DATE AND TIME: " + DateTime.Now; Log.WriteLine(Title); Log.WriteLine(date); Log.Verbosity = 1; var input = arguments.Source; var sourceFileName = input.Name; var outputDir = arguments.Output; var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(input.FullName); var outputTxtPath = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".txt").ToFileInfo(); Log.WriteIfVerbose("# Recording file: " + input.FullName); Log.WriteIfVerbose("# Config file: " + arguments.Config); Log.WriteIfVerbose("# Output folder =" + outputDir.FullName); FileTools.WriteTextFile(outputTxtPath.FullName, date + "\n# Recording file: " + input.FullName); //READ PARAMETER VALUES FROM INI FILE // load YAML configuration Config configuration = ConfigFile.Deserialize(arguments.Config); //ii: SET SONOGRAM CONFIGURATION SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = input.FullName; sonoConfig.WindowSize = configuration.GetIntOrNull(AnalysisKeys.KeyFrameSize) ?? 512; sonoConfig.WindowOverlap = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.5; sonoConfig.WindowFunction = configuration[AnalysisKeys.KeyWindowFunction]; sonoConfig.NPointSmoothFFT = configuration.GetIntOrNull(AnalysisKeys.KeyNPointSmoothFft) ?? 256; sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType(configuration[AnalysisKeys.NoiseReductionType]); int minHz = configuration.GetIntOrNull("MIN_HZ") ?? 0; int maxHz = configuration.GetIntOrNull("MAX_HZ") ?? 11050; double segK1 = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K1") ?? 0; double segK2 = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K2") ?? 0; double latency = configuration.GetDoubleOrNull("K1_K2_LATENCY") ?? 0; double vocalGap = configuration.GetDoubleOrNull("VOCAL_GAP") ?? 0; double minVocalLength = configuration.GetDoubleOrNull("MIN_VOCAL_DURATION") ?? 0; //bool DRAW_SONOGRAMS = (bool?)configuration.DrawSonograms ?? true; //options to draw sonogram //double intensityThreshold = Acoustics.AED.Default.intensityThreshold; //if (dict.ContainsKey(key_AED_INTENSITY_THRESHOLD)) intensityThreshold = Double.Parse(dict[key_AED_INTENSITY_THRESHOLD]); //int smallAreaThreshold = Acoustics.AED.Default.smallAreaThreshold; //if( dict.ContainsKey(key_AED_SMALL_AREA_THRESHOLD)) smallAreaThreshold = Int32.Parse(dict[key_AED_SMALL_AREA_THRESHOLD]); // COnvert input recording into wav var convertParameters = new AudioUtilityRequest { TargetSampleRate = 17640 }; var fileToAnalyse = new FileInfo(Path.Combine(outputDir.FullName, "temp.wav")); if (File.Exists(fileToAnalyse.FullName)) { File.Delete(fileToAnalyse.FullName); } var convertedFileInfo = AudioFilePreparer.PrepareFile( input, fileToAnalyse, convertParameters, outputDir); // (A) ########################################################################################################################## AudioRecording recording = new AudioRecording(fileToAnalyse.FullName); int signalLength = recording.WavReader.Samples.Length; TimeSpan wavDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); double frameDurationInSeconds = sonoConfig.WindowSize / (double)recording.SampleRate; TimeSpan frameDuration = TimeSpan.FromTicks((long)(frameDurationInSeconds * TimeSpan.TicksPerSecond)); int stepSize = (int)Math.Floor(sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap)); double stepDurationInSeconds = sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap) / recording.SampleRate; TimeSpan stepDuration = TimeSpan.FromTicks((long)(stepDurationInSeconds * TimeSpan.TicksPerSecond)); double framesPerSecond = 1 / stepDuration.TotalSeconds; int frameCount = signalLength / stepSize; // (B) ################################## EXTRACT ENVELOPE and SPECTROGRAM ################################## var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts( recording, sonoConfig.WindowSize, sonoConfig.WindowOverlap); //double[] avAbsolute = dspOutput.Average; //average absolute value over the minute recording // (C) ################################## GET SIGNAL WAVEFORM ################################## double[] signalEnvelope = dspOutput.Envelope; double avSignalEnvelope = signalEnvelope.Average(); // (D) ################################## GET Amplitude Spectrogram ################################## double[,] amplitudeSpectrogram = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram. // (E) ################################## Generate deciBel spectrogram from amplitude spectrogram double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra( dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, recording.SampleRate, epsilon); LoggedConsole.WriteLine("# Finished calculating decibel spectrogram."); StringBuilder sb = new StringBuilder(); sb.AppendLine("\nSIGNAL PARAMETERS"); sb.AppendLine("Signal Duration =" + wavDuration); sb.AppendLine("Sample Rate =" + recording.SampleRate); sb.AppendLine("Min Signal Value =" + dspOutput.MinSignalValue); sb.AppendLine("Max Signal Value =" + dspOutput.MaxSignalValue); sb.AppendLine("Max Absolute Ampl =" + signalEnvelope.Max().ToString("F3") + " (See Note 1)"); sb.AppendLine("Epsilon Ampl (1 bit)=" + epsilon); sb.AppendLine("\nFRAME PARAMETERS"); sb.AppendLine("Window Size =" + sonoConfig.WindowSize); sb.AppendLine("Frame Count =" + frameCount); sb.AppendLine("Envelope length=" + signalEnvelope.Length); sb.AppendLine("Frame Duration =" + frameDuration.TotalMilliseconds.ToString("F3") + " ms"); sb.AppendLine("Frame overlap =" + sonoConfig.WindowOverlap); sb.AppendLine("Step Size =" + stepSize); sb.AppendLine("Step duration =" + stepDuration.TotalMilliseconds.ToString("F3") + " ms"); sb.AppendLine("Frames Per Sec =" + framesPerSecond.ToString("F1")); sb.AppendLine("\nFREQUENCY PARAMETERS"); sb.AppendLine("Nyquist Freq =" + dspOutput.NyquistFreq + " Hz"); sb.AppendLine("Freq Bin Width =" + dspOutput.FreqBinWidth.ToString("F2") + " Hz"); sb.AppendLine("Nyquist Bin =" + dspOutput.NyquistBin); sb.AppendLine("\nENERGY PARAMETERS"); double val = dspOutput.FrameEnergy.Min(); sb.AppendLine( "Minimum dB / frame =" + (10 * Math.Log10(val)).ToString("F2") + " (See Notes 2, 3 & 4)"); val = dspOutput.FrameEnergy.Max(); sb.AppendLine("Maximum dB / frame =" + (10 * Math.Log10(val)).ToString("F2")); sb.AppendLine("\ndB NOISE SUBTRACTION"); double noiseRange = 2.0; //sb.AppendLine("Noise (estimate of mode) =" + sonogram.SnrData.NoiseSubtracted.ToString("F3") + " dB (See Note 5)"); //double noiseSpan = sonogram.SnrData.NoiseRange; //sb.AppendLine("Noise range =" + noiseSpan.ToString("F2") + " to +" + (noiseSpan * -1).ToString("F2") + " dB (See Note 6)"); //sb.AppendLine("SNR (max frame-noise) =" + sonogram.SnrData.Snr.ToString("F2") + " dB (See Note 7)"); //sb.Append("\nSEGMENTATION PARAMETERS"); //sb.Append("Segment Thresholds K1: {0:f2}. K2: {1:f2} (See Note 8)", segK1, segK2); //sb.Append("# Event Count = " + predictedEvents.Count()); FileTools.Append2TextFile(outputTxtPath.FullName, sb.ToString()); FileTools.Append2TextFile(outputTxtPath.FullName, GetSNRNotes(noiseRange).ToString()); // (F) ################################## DRAW IMAGE 1: original spectorgram Log.WriteLine("# Start drawing noise reduced sonograms."); TimeSpan X_AxisInterval = TimeSpan.FromSeconds(1); //int Y_AxisInterval = (int)Math.Round(1000 / dspOutput.FreqBinWidth); int nyquist = recording.SampleRate / 2; int hzInterval = 1000; var image1 = DrawSonogram(deciBelSpectrogram, wavDuration, X_AxisInterval, stepDuration, nyquist, hzInterval); // (G) ################################## Calculate modal background noise spectrum in decibels //double SD_COUNT = -0.5; // number of SDs above the mean for noise removal //NoiseReductionType nrt = NoiseReductionType.MODAL; //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, SD_COUNT); //double upperPercentileBound = 0.2; // lowest percentile for noise removal //NoiseReductionType nrt = NoiseReductionType.LOWEST_PERCENTILE; //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound); // (H) ################################## Calculate BRIGGS noise removal from amplitude spectrum int percentileBound = 20; // low energy percentile for noise removal //double binaryThreshold = 0.6; //works for higher SNR recordings double binaryThreshold = 0.4; //works for lower SNR recordings //double binaryThreshold = 0.3; //works for lower SNR recordings double[,] m = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetMask( amplitudeSpectrogram, percentileBound, binaryThreshold); string title = "TITLE"; var image2 = NoiseRemoval_Briggs.DrawSonogram( m, wavDuration, X_AxisInterval, stepDuration, nyquist, hzInterval, title); //Image image2 = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetSonograms(amplitudeSpectrogram, upperPercentileBound, binaryThreshold, // wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval); // (I) ################################## Calculate MEDIAN noise removal from amplitude spectrum //double upperPercentileBound = 0.8; // lowest percentile for noise removal //NoiseReductionType nrt = NoiseReductionType.MEDIAN; //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound); //double[,] noiseReducedSpectrogram1 = tuple.Item1; // //double[] noiseProfile = tuple.Item2; // smoothed modal profile //SNR.NoiseProfile dBProfile = SNR.CalculateNoiseProfile(deciBelSpectrogram, SD_COUNT); // calculate noise value for each freq bin. //double[] noiseProfile = DataTools.filterMovingAverage(dBProfile.noiseThresholds, 7); // smooth modal profile //double[,] noiseReducedSpectrogram1 = SNR.TruncateBgNoiseFromSpectrogram(deciBelSpectrogram, dBProfile.noiseThresholds); //Image image2 = DrawSonogram(noiseReducedSpectrogram1, wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval); var combinedImage = ImageTools.CombineImagesVertically(image1, image2); string imagePath = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".png"); combinedImage.Save(imagePath); // (G) ################################## Calculate modal background noise spectrum in decibels Log.WriteLine("# Finished recording:- " + input.Name); }
//////public static IndexCalculateResult Analysis( public static SpectralIndexValuesForContentDescription Analysis( AudioRecording recording, TimeSpan segmentOffsetTimeSpan, int sampleRateOfOriginalAudioFile, bool returnSonogramInfo = false) { // returnSonogramInfo = true; // if debugging double epsilon = recording.Epsilon; int sampleRate = recording.WavReader.SampleRate; //var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); var indexCalculationDuration = TimeSpan.FromSeconds(ContentSignatures.IndexCalculationDurationInSeconds); // Get FRAME parameters for the calculation of Acoustic Indices int frameSize = ContentSignatures.FrameSize; int frameStep = frameSize; // that is, windowOverlap = zero double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second var frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); // INITIALISE a RESULTS STRUCTURE TO return // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc. var config = new IndexCalculateConfig(); // sets some default values int freqBinCount = frameSize / 2; var indexProperties = GetIndexProperties(); ////////var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, segmentOffsetTimeSpan, config); var spectralIndices = new SpectralIndexValuesForContentDescription(); ///////result.SummaryIndexValues = null; ///////SpectralIndexValues spectralIndices = result.SpectralIndexValues; // set up default spectrogram to return ///////result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null; ///////result.Hits = null; ///////result.TrackScores = new List<Plot>(); // ################################## FINISHED SET-UP // ################################## NOW GET THE AMPLITUDE SPECTROGRAM // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT // Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns. var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep); var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram; // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ################################## // Get the oscillation spectral index OSC separately from signal because need a different frame size etc. var sampleLength = Oscillations2014.DefaultSampleLength; var frameLength = Oscillations2014.DefaultFrameLength; var sensitivity = Oscillations2014.DefaultSensitivityThreshold; var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(recording, frameLength, sampleLength, sensitivity); // double length of the vector because want to work with 256 element vector for spectrogram purposes spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort); // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ################################## // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2. // original sample rate can be anything 11.0-44.1 kHz. int originalNyquist = sampleRateOfOriginalAudioFile / 2; // if up-sampling has been done if (dspOutput1.NyquistFreq > originalNyquist) { dspOutput1.NyquistFreq = originalNyquist; dspOutput1.NyquistBin = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that bin width does not change } // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX spectralIndices.ACI = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram); // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram); for (int i = 0; i < temporalEntropySpectrum.Length; i++) { temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i]; } spectralIndices.ENT = temporalEntropySpectrum; // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ################################## // i: Convert amplitude spectrogram to decibels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); spectralIndices.BGN = spectralDecibelBgn; // ii: Calculate the noise reduced decibel spectrogram derived from segment recording. // REUSE the var decibelSpectrogram but this time using dspOutput1. decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(decibelSpectrogram); // ###################################################################################################################################################### // iv: CALCULATE SPECTRAL COVER. NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0 // FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth // dB THRESHOLD for calculating spectral coverage double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb; // Calculate lower and upper boundary bin ids. // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from bio-phony. int midFreqBound = config.MidFreqBound; int lowFreqBound = config.LowFreqBound; int lowerBinBound = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth); int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth); var spActivity = ActivityAndCover.CalculateSpectralEvents(decibelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound); //spectralIndices.CVR = spActivity.CoverSpectrum; spectralIndices.EVN = spActivity.EventSpectrum; ///////result.TrackScores = null; ///////return result; return(spectralIndices); } // end calculation of Six Spectral Indices
/// <summary> /// Does the Analysis /// Returns a DataTable /// </summary> /// <param name="fiSegmentOfSourceFile"></param> /// <param name="configDict"></param> /// <param name="diOutputDir"></param> /// <param name="opFileName"></param> /// <param name="segmentStartOffset"></param> /// <param name="config"></param> /// <param name="segmentAudioFile"></param> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, DirectoryInfo diOutputDir, string opFileName, TimeSpan segmentStartOffset) { //set default values int bandWidth = 500; //detect bars in bands of this width. int frameSize = 1024; double windowOverlap = 0.0; double intensityThreshold = double.Parse(configDict[key_INTENSITY_THRESHOLD]); //intensityThreshold = 0.01; AudioRecording recording = AudioRecording.GetAudioRecording(fiSegmentOfSourceFile, RESAMPLE_RATE, diOutputDir.FullName, opFileName); if (recording == null) { LoggedConsole.WriteLine("############ WARNING: Recording could not be obtained - likely file does not exist."); return(null); } int sr = recording.SampleRate; double binWidth = recording.SampleRate / (double)frameSize; double frameDuration = frameSize / (double)sr; double frameOffset = frameDuration * (1 - windowOverlap); //seconds between start of each frame double framesPerSecond = 1 / frameOffset; TimeSpan tsRecordingtDuration = recording.Duration; int colStep = (int)Math.Round(bandWidth / binWidth); //i: GET SONOGRAM AS MATRIX double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); var results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, sr, epsilon, frameSize, windowOverlap); double[] avAbsolute = results2.Average; //average absolute value over the minute recording //double[] envelope = results2.Item2; double[,] spectrogram = results2.AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. double windowPower = results2.WindowPower; //############################ NEXT LINE FOR DEBUGGING ONLY //spectrogram = GetTestSpectrogram(spectrogram.GetLength(0), spectrogram.GetLength(1), 0.01, 0.03); var output = DetectGratingEvents(spectrogram, colStep, intensityThreshold); var amplitudeArray = output.Item2; //for debug purposes only //convert List of Dictionary events to List of ACousticevents. //also set up the hits matrix. int rowCount = spectrogram.GetLength(0); int colCount = spectrogram.GetLength(1); var hitsMatrix = new double[rowCount, colCount]; var acousticEvents = new List <AcousticEvent>(); double minFrameCount = 8; //this assumes that the minimum grid is 2 * 4 = 8 long foreach (Dictionary <string, double> item in output.Item1) { int minRow = (int)item[key_START_FRAME]; int maxRow = (int)item[key_END_FRAME]; int frameCount = maxRow - minRow + 1; if (frameCount < minFrameCount) { continue; //only want events that are over a minimum length } int minCol = (int)item[key_MIN_FREQBIN]; int maxCol = (int)item[key_MAX_FREQBIN]; double periodicity = item[key_PERIODICITY]; double[] subarray = DataTools.Subarray(avAbsolute, minRow, maxRow - minRow + 1); double severity = 0.1; int[] bounds = DataTools.Peaks_CropToFirstAndLast(subarray, severity); minRow = minRow + bounds[0]; maxRow = minRow + bounds[1]; if (maxRow >= rowCount) { maxRow = rowCount - 1; } Oblong o = new Oblong(minRow, minCol, maxRow, maxCol); var ae = new AcousticEvent(segmentStartOffset, o, results2.NyquistFreq, frameSize, frameDuration, frameOffset, frameCount); ae.Name = string.Format("p={0:f0}", periodicity); ae.Score = item[key_SCORE]; ae.ScoreNormalised = item[key_SCORE] / 0.5; acousticEvents.Add(ae); //display event on the hits matrix for (int r = minRow; r < maxRow; r++) { for (int c = minCol; c < maxCol; c++) { hitsMatrix[r, c] = periodicity; } } } //foreach //set up the songogram to return. Use the existing amplitude sonogram int bitsPerSample = recording.WavReader.BitsPerSample; //NoiseReductionType nrt = SNR.Key2NoiseReductionType("NONE"); NoiseReductionType nrt = SNR.KeyToNoiseReductionType("STANDARD"); var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram(recording.BaseName, frameSize, windowOverlap, bitsPerSample, windowPower, sr, tsRecordingtDuration, nrt, spectrogram); sonogram.DecibelsNormalised = new double[sonogram.FrameCount]; for (int i = 0; i < sonogram.FrameCount; i++) //foreach frame or time step { sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]); } sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised); return(Tuple.Create(sonogram, hitsMatrix, amplitudeArray, acousticEvents, tsRecordingtDuration)); } //Analysis()
public static void Main(Arguments arguments) { //1. set up the necessary files //DirectoryInfo diSource = arguments.Source.Directory; FileInfo fiSourceRecording = arguments.Source; FileInfo fiConfig = arguments.Config.ToFileInfo(); FileInfo fiImage = arguments.Output.ToFileInfo(); fiImage.CreateParentDirectories(); string title = "# CREATE FOUR (4) SONOGRAMS FROM AUDIO RECORDING"; string date = "# DATE AND TIME: " + DateTime.Now; LoggedConsole.WriteLine(title); LoggedConsole.WriteLine(date); LoggedConsole.WriteLine("# Input audio file: " + fiSourceRecording.Name); LoggedConsole.WriteLine("# Output image file: " + fiImage); //2. get the config dictionary Config configuration = ConfigFile.Deserialize(fiConfig); //below three lines are examples of retrieving info from Config config //string analysisIdentifier = configuration[AnalysisKeys.AnalysisName]; //bool saveIntermediateWavFiles = (bool?)configuration[AnalysisKeys.SaveIntermediateWavFiles] ?? false; //scoreThreshold = (double?)configuration[AnalysisKeys.EventThreshold] ?? scoreThreshold; //3 transfer conogram parameters to a dictionary to be passed around var configDict = new Dictionary <string, string>(); // #Resample rate must be 2 X the desired Nyquist. Default is that of recording. configDict["ResampleRate"] = (configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? 17640).ToString(); configDict["FrameLength"] = configuration[AnalysisKeys.FrameLength] ?? "512"; int frameSize = configuration.GetIntOrNull(AnalysisKeys.FrameLength) ?? 512; // #Frame Overlap as fraction: default=0.0 configDict["FrameOverlap"] = configuration[AnalysisKeys.FrameOverlap] ?? "0.0"; double windowOverlap = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.0; // #MinHz: 500 // #MaxHz: 3500 // #NOISE REDUCTION PARAMETERS configDict["DoNoiseReduction"] = configuration["DoNoiseReduction"] ?? "true"; configDict["BgNoiseThreshold"] = configuration["BgNoiseThreshold"] ?? "3.0"; configDict["ADD_AXES"] = configuration["ADD_AXES"] ?? "true"; configDict["AddSegmentationTrack"] = configuration["AddSegmentationTrack"] ?? "true"; // 3: GET RECORDING var startOffsetMins = TimeSpan.Zero; var endOffsetMins = TimeSpan.Zero; FileInfo fiOutputSegment = fiSourceRecording; if (!(startOffsetMins == TimeSpan.Zero && endOffsetMins == TimeSpan.Zero)) { var buffer = new TimeSpan(0, 0, 0); fiOutputSegment = new FileInfo(Path.Combine(fiImage.DirectoryName, "tempWavFile.wav")); //This method extracts segment and saves to disk at the location fiOutputSegment. var resampleRate = configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? AppConfigHelper.DefaultTargetSampleRate; AudioRecording.ExtractSegment(fiSourceRecording, startOffsetMins, endOffsetMins, buffer, resampleRate, fiOutputSegment); } var recording = new AudioRecording(fiOutputSegment.FullName); // EXTRACT ENVELOPE and SPECTROGRAM var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, windowOverlap); // average absolute value over the minute recording ////double[] avAbsolute = dspOutput.Average; // (A) ################################## EXTRACT INDICES FROM THE SIGNAL WAVEFORM ################################## // var wavDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); // double totalSeconds = wavDuration.TotalSeconds; // double[] signalEnvelope = dspOutput.Envelope; // double avSignalEnvelope = signalEnvelope.Average(); // double[] frameEnergy = dspOutput.FrameEnergy; // double highAmplIndex = dspOutput.HighAmplitudeCount / totalSeconds; // double binWidth = dspOutput.BinWidth; // int nyquistBin = dspOutput.NyquistBin; // dspOutput.WindowPower, // dspOutput.FreqBinWidth int nyquistFreq = dspOutput.NyquistFreq; double epsilon = recording.Epsilon; // i: prepare amplitude spectrogram double[,] amplitudeSpectrogramData = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram. var image1 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(amplitudeSpectrogramData)); // ii: prepare decibel spectrogram prior to noise removal double[,] decibelSpectrogramdata = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, recording.SampleRate, epsilon); decibelSpectrogramdata = MatrixTools.NormaliseMatrixValues(decibelSpectrogramdata); var image2 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata)); // iii: Calculate background noise spectrum in decibels // Calculate noise value for each freq bin. double sdCount = 0.0; // number of SDs above the mean for noise removal var decibelProfile = NoiseProfile.CalculateModalNoiseProfile(decibelSpectrogramdata, sdCount); // DataTools.writeBarGraph(dBProfile.NoiseMode); // iv: Prepare noise reduced spectrogram decibelSpectrogramdata = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogramdata, decibelProfile.NoiseThresholds); //double dBThreshold = 1.0; // SPECTRAL dB THRESHOLD for smoothing background //decibelSpectrogramdata = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogramdata, dBThreshold); var image3 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata)); // prepare new sonogram config and draw second image going down different code pathway var config = new SonogramConfig { MinFreqBand = 0, MaxFreqBand = 10000, NoiseReductionType = SNR.KeyToNoiseReductionType("Standard"), NoiseReductionParameter = 1.0, WindowSize = frameSize, WindowOverlap = windowOverlap, }; //var mfccConfig = new MfccConfiguration(config); int bandCount = config.mfccConfig.FilterbankCount; bool doMelScale = config.mfccConfig.DoMelScale; int ccCount = config.mfccConfig.CcCount; int fftBins = config.FreqBinCount; //number of Hz bands = 2^N +1 because includes the DC band int minHz = config.MinFreqBand ?? 0; int maxHz = config.MaxFreqBand ?? nyquistFreq; var standardSonogram = new SpectrogramStandard(config, recording.WavReader); var image4 = standardSonogram.GetImage(); // TODO next line crashes - does not produce cepstral sonogram. //SpectrogramCepstral cepSng = new SpectrogramCepstral(config, recording.WavReader); //Image image5 = cepSng.GetImage(); //var mti = SpectrogramTools.Sonogram2MultiTrackImage(sonogram, configDict); //var image = mti.GetImage(); //Image image = SpectrogramTools.Matrix2SonogramImage(deciBelSpectrogram, config); //Image image = SpectrogramTools.Audio2SonogramImage(FileInfo fiAudio, Dictionary<string, string> configDict); //prepare sonogram images var protoImage6 = new Image_MultiTrack(standardSonogram.GetImage(doHighlightSubband: false, add1KHzLines: true, doMelScale: false)); protoImage6.AddTrack(ImageTrack.GetTimeTrack(standardSonogram.Duration, standardSonogram.FramesPerSecond)); protoImage6.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, protoImage6.SonogramImage.Width)); protoImage6.AddTrack(ImageTrack.GetSegmentationTrack(standardSonogram)); var image6 = protoImage6.GetImage(); var list = new List <Image <Rgb24> >(); list.Add(image1); // amplitude spectrogram list.Add(image2); // decibel spectrogram before noise removal list.Add(image3); // decibel spectrogram after noise removal list.Add(image4); // second version of noise reduced spectrogram //list.Add(image5); // ceptral sonogram list.Add(image6.CloneAs <Rgb24>()); // multitrack image Image finalImage = ImageTools.CombineImagesVertically(list); finalImage.Save(fiImage.FullName); ////2: NOISE REMOVAL //double[,] originalSg = sonogram.Data; //double[,] mnr = sonogram.Data; //mnr = ImageTools.WienerFilter(mnr, 3); //double backgroundThreshold = 4.0; //SETS MIN DECIBEL BOUND //var output = SNR.NoiseReduce(mnr, NoiseReductionType.STANDARD, backgroundThreshold); //double ConfigRange = 70; //sets the the max dB //mnr = SNR.SetConfigRange(output.Item1, 0.0, ConfigRange); ////3: Spectral tracks sonogram //byte[,] binary = MatrixTools.IdentifySpectralRidges(mnr); //binary = MatrixTools.ThresholdBinarySpectrum(binary, mnr, 10); //binary = MatrixTools.RemoveOrphanOnesInBinaryMatrix(binary); ////binary = MatrixTools.PickOutLines(binary); //syntactic approach //sonogram.SetBinarySpectrum(binary); ////sonogram.Data = SNR.SpectralRidges2Intensity(binary, originalSg); //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, false)); //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond)); //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.sonogramImage.Width)); //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); //fn = outputFolder + wavFileName + "_tracks.png"; //image.Save(fn); //LoggedConsole.WriteLine("Spectral tracks sonogram to file: " + fn); //3: prepare image of spectral peaks sonogram //sonogram.Data = SNR.NoiseReduce_Peaks(originalSg, dynamicRange); //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines)); //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration)); //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width)); //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); //fn = outputFolder + wavFileName + "_peaks.png"; //image.Save(fn); //LoggedConsole.WriteLine("Spectral peaks sonogram to file: " + fn); //4: Sobel approach //sonogram.Data = SNR.NoiseReduce_Sobel(originalSg, dynamicRange); //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines)); //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration)); //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width)); //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); //fn = outputFolder + wavFileName + "_sobel.png"; //image.Save(fn); //LoggedConsole.WriteLine("Sobel sonogram to file: " + fn); // I1.txt contains the sonogram matrix produced by matlab //string matlabFile = @"C:\SensorNetworks\Software\AudioAnalysis\AED\Test\matlab\GParrots_JB2_20090607-173000.wav_minute_3\I1.txt"; //double[,] matlabMatrix = Util.fileToMatrix(matlabFile, 256, 5166); //LoggedConsole.WriteLine(matrix[0, 2] + " vs " + matlabMatrix[254, 0]); //LoggedConsole.WriteLine(matrix[0, 3] + " vs " + matlabMatrix[253, 0]); // TODO put this back once sonogram issues resolved /* * LoggedConsole.WriteLine("START: AED"); * IEnumerable<Oblong> oblongs = AcousticEventDetection.detectEvents(3.0, 100, matrix); * LoggedConsole.WriteLine("END: AED"); * * * //set up static variables for init Acoustic events * //AcousticEvent. doMelScale = config.DoMelScale; * AcousticEvent.FreqBinCount = config.FreqBinCount; * AcousticEvent.FreqBinWidth = config.FftConfig.NyquistFreq / (double)config.FreqBinCount; * // int minF = (int)config.MinFreqBand; * // int maxF = (int)config.MaxFreqBand; * AcousticEvent.FrameDuration = config.GetFrameOffset(); * * * var events = new List<EventPatternRecog.Rectangle>(); * foreach (Oblong o in oblongs) * { * var e = new AcousticEvent(o); * events.Add(new EventPatternRecog.Rectangle(e.StartTime, (double) e.MaxFreq, e.StartTime + e.Duration, (double)e.MinFreq)); * //LoggedConsole.WriteLine(e.StartTime + "," + e.Duration + "," + e.MinFreq + "," + e.MaxFreq); * } * * LoggedConsole.WriteLine("# AED events: " + events.Count); * * LoggedConsole.WriteLine("START: EPR"); * IEnumerable<EventPatternRecog.Rectangle> eprRects = EventPatternRecog.detectGroundParrots(events); * LoggedConsole.WriteLine("END: EPR"); * * var eprEvents = new List<AcousticEvent>(); * foreach (EventPatternRecog.Rectangle r in eprRects) * { * var ae = new AcousticEvent(r.Left, r.Right - r.Left, r.Bottom, r.Top, false); * LoggedConsole.WriteLine(ae.WriteProperties()); * eprEvents.Add(ae); * } * * string imagePath = Path.Combine(outputFolder, "RESULTS_" + Path.GetFileNameWithoutExtension(recording.BaseName) + ".png"); * * bool doHighlightSubband = false; bool add1kHzLines = true; * var image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines)); * //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration)); * //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width)); * //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); * image.AddEvents(eprEvents); * image.Save(outputFolder + wavFileName + ".png"); */ LoggedConsole.WriteLine("\nFINISHED!"); }
/// <summary> /// returns the duration of that part of frame not overlapped with following frame. /// Duration is given in seconds. /// Assumes window size and overlap fraction already known. /// </summary> public static TimeSpan GetFrameOffset(int windowSize, double windowOverlap, int sampleRate) { int step = DSP_Frames.FrameStep(windowSize, windowOverlap); return(TimeSpan.FromSeconds(step / (double)sampleRate)); }
public static void SimilarityIndex(double[] channelL, double[] channelR, double epsilon, int sampleRate, out double similarityIndex, out double decibelIndex, out double avDecibelBias, out double medianDecibelBias, out double lowFreqDbBias, out double midFreqDbBias, out double hiFreqDbBias) { //var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFFTs(subsegmentRecording, frameSize, frameStep); int frameSize = 512; int frameStep = 512; frameSize *= 16; // take longer window to get low freq frameStep *= 16; var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, frameSize, frameStep); var avSpectrumL = MatrixTools.GetColumnAverages(dspOutputL.AmplitudeSpectrogram); //var medianSpectrumL = MatrixTools.GetColumnMedians(dspOutputL.amplitudeSpectrogram); var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, frameSize, frameStep); var avSpectrumR = MatrixTools.GetColumnAverages(dspOutputR.AmplitudeSpectrogram); //var medianSpectrumR = MatrixTools.GetColumnMedians(dspOutputR.amplitudeSpectrogram); similarityIndex = 0.0; decibelIndex = 0.0; for (int i = 0; i < avSpectrumR.Length; i++) { double min = Math.Min(avSpectrumL[i], avSpectrumR[i]); double max = Math.Max(avSpectrumL[i], avSpectrumR[i]); if (max <= 0.000001) { max = 0.000001; // to prevent division by zero. } // index = min / max; double index = min * min / (max * max); similarityIndex += index; double dBmin = 20 * Math.Log10(min); double dBmax = 20 * Math.Log10(max); decibelIndex += dBmax - dBmin; } similarityIndex /= avSpectrumR.Length; decibelIndex /= avSpectrumR.Length; double medianLeft = Statistics.GetMedian(avSpectrumL); double medianRight = Statistics.GetMedian(avSpectrumR); medianDecibelBias = medianLeft - medianRight; // init values avDecibelBias = 0.0; lowFreqDbBias = 0.0; // calculate the freq band bounds for 2kHz and 7khz. int lowBound = frameSize * 2000 / sampleRate; int midBound = frameSize * 7000 / sampleRate; for (int i = 0; i < lowBound; i++) { double dbLeft = 20 * Math.Log10(avSpectrumL[i]); double dbRight = 20 * Math.Log10(avSpectrumR[i]); avDecibelBias += dbLeft - dbRight; lowFreqDbBias += dbLeft - dbRight; } midFreqDbBias = 0.0; for (int i = lowBound; i < midBound; i++) { double dbLeft = 20 * Math.Log10(avSpectrumL[i]); double dbRight = 20 * Math.Log10(avSpectrumR[i]); avDecibelBias += dbLeft - dbRight; midFreqDbBias += dbLeft - dbRight; } hiFreqDbBias = 0.0; for (int i = midBound; i < avSpectrumR.Length; i++) { double dbLeft = 20 * Math.Log10(avSpectrumL[i]); double dbRight = 20 * Math.Log10(avSpectrumR[i]); avDecibelBias += dbLeft - dbRight; hiFreqDbBias += dbLeft - dbRight; } avDecibelBias /= avSpectrumR.Length; lowFreqDbBias /= lowBound; midFreqDbBias /= midBound - lowBound; hiFreqDbBias /= avSpectrumR.Length - midBound; }
public void TestEnvelopeAndFft2() { var recording = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav")); int windowSize = 512; // window overlap is used only for sonograms. It is not used when calculating acoustic indices. double windowOverlap = 0.0; var windowFunction = WindowFunctions.HAMMING.ToString(); var fftdata = DSP_Frames.ExtractEnvelopeAndFfts( recording, windowSize, windowOverlap, windowFunction); // Now recover the data /* * // The following data is required when constructing sonograms * var duration = recording.WavReader.Time; * var sr = recording.SampleRate; * var frameCount = fftdata.FrameCount; * var fractionOfHighEnergyFrames = fftdata.FractionOfHighEnergyFrames; * var epislon = fftdata.Epsilon; * var windowPower = fftdata.WindowPower; * var amplSpectrogram = fftdata.AmplitudeSpectrogram; */ // The below info is only used when calculating spectral and summary indices // energy level information int clipCount = fftdata.ClipCount; int maxAmpCount = fftdata.HighAmplitudeCount; double maxSig = fftdata.MaxSignalValue; double minSig = fftdata.MinSignalValue; // envelope info var avArray = fftdata.Average; var envelope = fftdata.Envelope; var frameEnergy = fftdata.FrameEnergy; var frameDecibels = fftdata.FrameDecibels; // freq scale info var nyquistBin = fftdata.NyquistBin; var nyquistFreq = fftdata.NyquistFreq; var freqBinWidth = fftdata.FreqBinWidth; // DO THE TESTS of clipping and signal level info // energy level information Assert.AreEqual(0, clipCount); Assert.AreEqual(0, maxAmpCount); Assert.AreEqual(-0.250434888760033, minSig, 0.000001); Assert.AreEqual(0.255165257728813, maxSig, 0.000001); // DO THE TESTS of energy array info // first write to here and move binary file to resources folder. // var averageArrayFile = new FileInfo(this.outputDirectory + @"\BAC2_20071008-085040_AvSigArray.bin"); // Binary.Serialize(averageArrayFile, avArray); var averageFile = PathHelper.ResolveAsset(@"EnvelopeAndFft\BAC2_20071008-085040_AvSigArray.bin"); var expectedAvArray = Binary.Deserialize <double[]>(averageFile); CollectionAssert.AreEqual(expectedAvArray, avArray); // var envelopeArrayFile = new FileInfo(this.outputDirectory + @"\BAC2_20071008-085040_EnvelopeArray.bin"); // Binary.Serialize(envelopeArrayFile, envelope); var envelopeFile = PathHelper.ResolveAsset(@"EnvelopeAndFft\BAC2_20071008-085040_EnvelopeArray.bin"); var expectedEnvelope = Binary.Deserialize <double[]>(envelopeFile); CollectionAssert.AreEqual(expectedEnvelope, envelope); var frameEnergyFile = PathHelper.ResolveAsset(@"EnvelopeAndFft\BAC2_20071008-085040_FrameEnergyArray.bin"); // uncomment this to update the binary data. Should be rarely needed // AT: Updated 2017-02-15 because FFT library changed in 864f7a491e2ea0e938161bd390c1c931ecbdf63c //Binary.Serialize(frameEnergyFile, frameEnergy); var expectedFrameEnergy = Binary.Deserialize <double[]>(frameEnergyFile); CollectionAssert.AreEqual(expectedFrameEnergy, frameEnergy); var frameDecibelsFile = PathHelper.ResolveAsset(@"EnvelopeAndFft\BAC2_20071008-085040_FrameDecibelsArray.bin"); // uncomment this to update the binary data. Should be rarely needed // AT: Updated 2017-02-15 because FFT library changed in 864f7a491e2ea0e938161bd390c1c931ecbdf63c //Binary.Serialize(frameDecibelsFile, frameDecibels); var expectedFrameDecibels = Binary.Deserialize <double[]>(frameDecibelsFile); CollectionAssert.That.AreEqual(expectedFrameDecibels, frameDecibels, Delta); // freq info Assert.AreEqual(255, nyquistBin); Assert.AreEqual(11025, nyquistFreq); Assert.AreEqual(43.0664, freqBinWidth, 0.00001); }
public static IndexCalculateResult Analysis( AudioRecording recording, TimeSpan subsegmentOffsetTimeSpan, Dictionary <string, IndexProperties> indexProperties, int sampleRateOfOriginalAudioFile, TimeSpan segmentStartOffset, IndexCalculateConfig config, bool returnSonogramInfo = false) { // returnSonogramInfo = true; // if debugging double epsilon = recording.Epsilon; int signalLength = recording.WavReader.GetChannel(0).Length; int sampleRate = recording.WavReader.SampleRate; var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); var indexCalculationDuration = config.IndexCalculationDurationTimeSpan; int nyquist = sampleRate / 2; // Get FRAME parameters for the calculation of Acoustic Indices //WARNING: DO NOT USE Frame Overlap when calculating acoustic indices. // It yields ACI, BGN, POW and EVN results that are significantly different from the default. // I have not had time to check if the difference is meaningful. Best to avoid. //int frameSize = (int?)config[AnalysisKeys.FrameLength] ?? IndexCalculateConfig.DefaultWindowSize; int frameSize = config.FrameLength; int frameStep = frameSize; // that is, windowOverlap = zero double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second var frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); int midFreqBound = config.MidFreqBound; int lowFreqBound = config.LowFreqBound; int freqBinCount = frameSize / 2; // double freqBinWidth = recording.Nyquist / (double)freqBinCount; // get duration in seconds and sample count and frame count double subsegmentDurationInSeconds = indexCalculationDuration.TotalSeconds; int subsegmentSampleCount = (int)(subsegmentDurationInSeconds * sampleRate); double subsegmentFrameCount = subsegmentSampleCount / (double)frameStep; subsegmentFrameCount = (int)Math.Ceiling(subsegmentFrameCount); // In order not to lose the last fractional frame, round up the frame number // and get the exact number of samples in the integer number of frames. // Do this because when IndexCalculationDuration = 100ms, the number of frames is only 8. subsegmentSampleCount = (int)(subsegmentFrameCount * frameStep); // get start and end samples of the subsegment and noise segment double localOffsetInSeconds = subsegmentOffsetTimeSpan.TotalSeconds - segmentStartOffset.TotalSeconds; int startSample = (int)(localOffsetInSeconds * sampleRate); int endSample = startSample + subsegmentSampleCount - 1; // Default behaviour: set SUBSEGMENT = total recording var subsegmentRecording = recording; // But if the indexCalculationDuration < segmentDuration if (indexCalculationDuration < segmentDuration) { // minimum samples needed to calculate acoustic indices. This value was chosen somewhat arbitrarily. // It allowes for case where IndexCalculationDuration = 100ms which is approx 8 frames int minimumViableSampleCount = frameSize * 8; int availableSignal = signalLength - startSample; // if (the required audio is beyond recording OR insufficient for analysis) then backtrack. if (availableSignal < minimumViableSampleCount) { // Back-track so we can fill a whole result. // This is a silent correction, equivalent to having a segment overlap for the last segment. var oldStart = startSample; startSample = signalLength - subsegmentSampleCount; endSample = signalLength; Logger.Trace(" Backtrack subsegment to fill missing data from imperfect audio cuts because not enough samples available. " + (oldStart - startSample) + " samples overlap."); } var subsamples = DataTools.Subarray(recording.WavReader.Samples, startSample, subsegmentSampleCount); var wr = new Acoustics.Tools.Wav.WavReader(subsamples, 1, 16, sampleRate); subsegmentRecording = new AudioRecording(wr); } // INITIALISE a RESULTS STRUCTURE TO return // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc. var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, subsegmentOffsetTimeSpan, config); SummaryIndexValues summaryIndices = result.SummaryIndexValues; SpectralIndexValues spectralIndices = result.SpectralIndexValues; // set up default spectrogram to return result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null; result.Hits = null; result.TrackScores = new List <Plot>(); // ################################## FINSIHED SET-UP // ################################## NOW GET THE AMPLITUDE SPECTORGRAMS // EXTRACT ENVELOPE and SPECTROGRAM FROM SUBSEGMENT var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(subsegmentRecording, frameSize, frameStep); // Select band according to min and max bandwidth int minBand = (int)(dspOutput1.AmplitudeSpectrogram.GetLength(1) * config.MinBandWidth); int maxBand = (int)(dspOutput1.AmplitudeSpectrogram.GetLength(1) * config.MaxBandWidth) - 1; dspOutput1.AmplitudeSpectrogram = MatrixTools.Submatrix( dspOutput1.AmplitudeSpectrogram, 0, minBand, dspOutput1.AmplitudeSpectrogram.GetLength(0) - 1, maxBand); // TODO: Michael to review whether bandwidth filter should be moved to DSP_Frames?? // Recalculate NyquistBin and FreqBinWidth, because they change with band selection //dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; //dspOutput1.FreqBinWidth = sampleRate / (double)dspOutput1.AmplitudeSpectrogram.GetLength(1) / 2; // Linear or Octave or Mel frequency scale? Set Linear as default. var freqScale = new FrequencyScale(nyquist: nyquist, frameSize: frameSize, hertzGridInterval: 1000); var freqScaleType = config.FrequencyScale; bool octaveScale = freqScaleType == FreqScaleType.Linear125Octaves7Tones28Nyquist32000; bool melScale = freqScaleType == FreqScaleType.Mel; if (octaveScale) { // only allow one octave scale at the moment - for Jasco marine recordings. // ASSUME fixed Occtave scale - USEFUL ONLY FOR JASCO 64000sr MARINE RECORDINGS // If you wish to use other octave scale types then need to put in the config file and and set up recovery here. freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); // Recalculate the spectrogram according to octave scale. This option works only when have high SR recordings. dspOutput1.AmplitudeSpectrogram = OctaveFreqScale.AmplitudeSpectra( dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon, freqScale); dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; // ASSUMPTION!!! Nyquist is in top Octave bin - not necessarily true!! } else if (melScale) { int minFreq = 0; int maxFreq = recording.Nyquist; dspOutput1.AmplitudeSpectrogram = MFCCStuff.MelFilterBank( dspOutput1.AmplitudeSpectrogram, config.MelScale, recording.Nyquist, minFreq, maxFreq); dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; // TODO: This doesn't make any sense, since the frequency width changes for each bin. Probably need to set this to NaN. // TODO: Whatever uses this value below, should probably be changed to not be depending on it. dspOutput1.FreqBinWidth = sampleRate / (double)dspOutput1.AmplitudeSpectrogram.GetLength(1) / 2; } // NOW EXTRACT SIGNAL FOR BACKGROUND NOISE CALCULATION // If the index calculation duration >= 30 seconds, then calculate BGN from the existing segment of recording. bool doSeparateBgnNoiseCalculation = indexCalculationDuration.TotalSeconds + (2 * config.BgNoiseBuffer.TotalSeconds) < segmentDuration.TotalSeconds / 2; var dspOutput2 = dspOutput1; if (doSeparateBgnNoiseCalculation) { // GET a longer SUBSEGMENT FOR NOISE calculation with 5 sec buffer on either side. // If the index calculation duration is shorter than 30 seconds, then need to calculate BGN noise from a longer length of recording // i.e. need to add noiseBuffer either side. Typical noiseBuffer value = 5 seconds int sampleBuffer = (int)(config.BgNoiseBuffer.TotalSeconds * sampleRate); var bgnRecording = AudioRecording.GetRecordingSubsegment(recording, startSample, endSample, sampleBuffer); // EXTRACT ENVELOPE and SPECTROGRAM FROM BACKGROUND NOISE SUBSEGMENT dspOutput2 = DSP_Frames.ExtractEnvelopeAndFfts(bgnRecording, frameSize, frameStep); // If necessary, recalculate the spectrogram according to octave scale. This option works only when have high SR recordings. if (octaveScale) { // ASSUME fixed Occtave scale - USEFUL ONLY FOR JASCO 64000sr MARINE RECORDINGS // If you wish to use other octave scale types then need to put in the config file and and set up recovery here. dspOutput2.AmplitudeSpectrogram = OctaveFreqScale.AmplitudeSpectra( dspOutput2.AmplitudeSpectrogram, dspOutput2.WindowPower, sampleRate, epsilon, freqScale); dspOutput2.NyquistBin = dspOutput2.AmplitudeSpectrogram.GetLength(1) - 1; // ASSUMPTION!!! Nyquist is in top Octave bin - not necessarily true!! } } // ###################################### BEGIN CALCULATION OF INDICES ################################## // (A) ################################## EXTRACT SUMMARY INDICES FROM THE SIGNAL WAVEFORM ################################## // average absolute value over the minute recording - not useful // double[] avAbsolute = dspOutput1.Average; double[] signalEnvelope = dspOutput1.Envelope; double avgSignalEnvelope = signalEnvelope.Average(); // 10 times log of amplitude squared summaryIndices.AvgSignalAmplitude = 20 * Math.Log10(avgSignalEnvelope); // Deal with case where the signal waveform is continuous flat with values < 0.001. Has happened!! // Although signal appears zero, this condition is required. if (avgSignalEnvelope < 0.0001) { Logger.Debug("Segment skipped because avSignalEnvelope is < 0.001!"); summaryIndices.ZeroSignal = 1.0; return(result); } // i. Check for clipping and high amplitude rates per second summaryIndices.HighAmplitudeIndex = dspOutput1.HighAmplitudeCount / subsegmentDurationInSeconds; summaryIndices.ClippingIndex = dspOutput1.ClipCount / subsegmentDurationInSeconds; // ii. Calculate bg noise in dB // Convert signal envelope to dB and subtract background noise. Default noise SD to calculate threshold = ZERO double signalBgn = NoiseRemovalModal.CalculateBackgroundNoise(dspOutput2.Envelope); summaryIndices.BackgroundNoise = signalBgn; // iii: FRAME ENERGIES - convert signal to decibels and subtract background noise. double[] dBEnvelope = SNR.Signal2Decibels(dspOutput1.Envelope); double[] dBEnvelopeSansNoise = SNR.SubtractAndTruncate2Zero(dBEnvelope, signalBgn); // iv: ACTIVITY for NOISE REDUCED SIGNAL ENVELOPE // Calculate fraction of frames having acoustic activity var activity = ActivityAndCover.CalculateActivity(dBEnvelopeSansNoise, frameStepTimeSpan); summaryIndices.Activity = activity.FractionOfActiveFrames; // v. average number of events per second whose duration > one frame // average event duration in milliseconds - no longer calculated //summaryIndices.AvgEventDuration = activity.avEventDuration; summaryIndices.EventsPerSecond = activity.EventCount / subsegmentDurationInSeconds; // vi. Calculate SNR and active frames SNR summaryIndices.Snr = dBEnvelopeSansNoise.Max(); summaryIndices.AvgSnrOfActiveFrames = activity.ActiveAvDb; // vii. ENTROPY of ENERGY ENVELOPE -- 1-Ht because want measure of concentration of acoustic energy. double entropy = DataTools.EntropyNormalised(DataTools.SquareValues(signalEnvelope)); summaryIndices.TemporalEntropy = 1 - entropy; // Note that the spectrogram has had the DC bin removed. i.e. has only 256 columns. double[,] amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram; // get amplitude spectrogram. // CALCULATE various NDSI (Normalised difference soundscape Index) FROM THE AMPLITUDE SPECTROGRAM // These options proved to be highly correlated. Therefore only use tuple.Item 1 which derived from Power Spectral Density. var tuple3 = SpectrogramTools.CalculateAvgSpectrumAndVarianceSpectrumFromAmplitudeSpectrogram(amplitudeSpectrogram); summaryIndices.Ndsi = SpectrogramTools.CalculateNdsi(tuple3.Item1, sampleRate, 1000, 2000, 8000); // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ################################## // Get the oscillation spectral index OSC separately from signal because need a different frame size etc. var sampleLength = Oscillations2014.DefaultSampleLength; var frameLength = Oscillations2014.DefaultFrameLength; var sensitivity = Oscillations2014.DefaultSensitivityThreshold; var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(subsegmentRecording, frameLength, sampleLength, sensitivity); // double length of the vector because want to work with 256 element vector for LDFC purposes spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort); // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ################################## // i: CALCULATE SPECTRUM OF THE SUM OF FREQ BIN AMPLITUDES - used for later calculation of ACI spectralIndices.SUM = MatrixTools.SumColumns(amplitudeSpectrogram); // Calculate lower and upper boundary bin ids. // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from biophony. // Boundary of upper bird-band is to avoid high freq artefacts due to mp3. int lowerBinBound = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth); int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth); // calculate number of freq bins in the bird-band. int midBandBinCount = middleBinBound - lowerBinBound + 1; if (octaveScale) { // the above frequency bin bounds do not apply with octave scale. Need to recalculate them suitable for Octave scale recording. lowFreqBound = freqScale.LinearBound; lowerBinBound = freqScale.GetBinIdForHerzValue(lowFreqBound); midFreqBound = 8000; // This value appears suitable for Jasco Marine recordings. Not much happens above 8kHz. //middleBinBound = freqScale.GetBinIdForHerzValue(midFreqBound); middleBinBound = freqScale.GetBinIdInReducedSpectrogramForHerzValue(midFreqBound); midBandBinCount = middleBinBound - lowerBinBound + 1; } // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2. // original sample rate can be anything 11.0-44.1 kHz. int originalNyquist = sampleRateOfOriginalAudioFile / 2; // if upsampling has been done if (dspOutput1.NyquistFreq > originalNyquist) { dspOutput1.NyquistFreq = originalNyquist; dspOutput1.NyquistBin = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that binwidth does not change } // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX spectralIndices.DIF = AcousticComplexityIndex.SumOfAmplitudeDifferences(amplitudeSpectrogram); double[] aciSpectrum = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram); spectralIndices.ACI = aciSpectrum; // remove low freq band of ACI spectrum and store average ACI value double[] reducedAciSpectrum = DataTools.Subarray(aciSpectrum, lowerBinBound, midBandBinCount); summaryIndices.AcousticComplexity = reducedAciSpectrum.Average(); // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram); for (int i = 0; i < temporalEntropySpectrum.Length; i++) { temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i]; } spectralIndices.ENT = temporalEntropySpectrum; // iv: remove background noise from the amplitude spectrogram // First calculate the noise profile from the amplitude sepctrogram double[] spectralAmplitudeBgn = NoiseProfile.CalculateBackgroundNoise(dspOutput2.AmplitudeSpectrogram); amplitudeSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(amplitudeSpectrogram, spectralAmplitudeBgn); // AMPLITUDE THRESHOLD for smoothing background, nhThreshold, assumes background noise ranges around -40dB. // This value corresponds to approximately 6dB above backgorund. amplitudeSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(amplitudeSpectrogram, nhThreshold: 0.015); ////ImageTools.DrawMatrix(spectrogramData, @"C:\SensorNetworks\WavFiles\Crows\image.png", false); ////DataTools.writeBarGraph(modalValues); result.AmplitudeSpectrogram = amplitudeSpectrogram; // v: ENTROPY OF AVERAGE SPECTRUM & VARIANCE SPECTRUM - at this point the spectrogram is a noise reduced amplitude spectrogram var tuple = AcousticEntropy.CalculateSpectralEntropies(amplitudeSpectrogram, lowerBinBound, midBandBinCount); // ENTROPY of spectral averages - Reverse the values i.e. calculate 1-Hs and 1-Hv, and 1-Hcov for energy concentration summaryIndices.EntropyOfAverageSpectrum = 1 - tuple.Item1; // ENTROPY of spectrum of Variance values summaryIndices.EntropyOfVarianceSpectrum = 1 - tuple.Item2; // ENTROPY of spectrum of Coefficient of Variation values summaryIndices.EntropyOfCoVSpectrum = 1 - tuple.Item3; // vi: ENTROPY OF DISTRIBUTION of maximum SPECTRAL PEAKS. // First extract High band SPECTROGRAM which is now noise reduced double entropyOfPeaksSpectrum = AcousticEntropy.CalculateEntropyOfSpectralPeaks(amplitudeSpectrogram, lowerBinBound, middleBinBound); summaryIndices.EntropyOfPeaksSpectrum = 1 - entropyOfPeaksSpectrum; // ###################################################################################################################################################### // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ################################## // i: Convert amplitude spectrogram to deciBels and calculate the dB background noise profile double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput2.AmplitudeSpectrogram, dspOutput2.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(deciBelSpectrogram); spectralIndices.BGN = spectralDecibelBgn; // ii: Calculate the noise reduced decibel spectrogram derived from segment recording. // REUSE the var decibelSpectrogram but this time using dspOutput1. deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); deciBelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(deciBelSpectrogram, spectralDecibelBgn); deciBelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(deciBelSpectrogram, nhThreshold: 2.0); // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(deciBelSpectrogram); // iv: CALCULATE SPECTRAL COVER. // NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0 // FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb; // dB THRESHOLD for calculating spectral coverage var spActivity = ActivityAndCover.CalculateSpectralEvents(deciBelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound); spectralIndices.CVR = spActivity.CoverSpectrum; spectralIndices.EVN = spActivity.EventSpectrum; summaryIndices.HighFreqCover = spActivity.HighFreqBandCover; summaryIndices.MidFreqCover = spActivity.MidFreqBandCover; summaryIndices.LowFreqCover = spActivity.LowFreqBandCover; // ###################################################################################################################################################### // v: CALCULATE SPECTRAL PEAK TRACKS and RIDGE indices. // NOTE: at this point, the var decibelSpectrogram is noise reduced. i.e. all its values >= 0.0 // Detecting ridges or spectral peak tracks requires using a 5x5 mask which has edge effects. // This becomes significant if we have a short indexCalculationDuration. // Consequently if the indexCalculationDuration < 10 seconds then we revert back to the recording and cut out a recording segment that includes // a buffer for edge effects. In most cases however, we can just use the decibel spectrogram already calculated and ignore the edge effects. double peakThreshold = 6.0; //dB SpectralPeakTracks sptInfo; if (indexCalculationDuration.TotalSeconds < 10.0) { // calculate a new decibel spectrogram sptInfo = SpectralPeakTracks.CalculateSpectralPeakTracks(recording, startSample, endSample, frameSize, octaveScale, peakThreshold); } else { // use existing decibel spectrogram sptInfo = new SpectralPeakTracks(deciBelSpectrogram, peakThreshold); } spectralIndices.SPT = sptInfo.SptSpectrum; spectralIndices.RHZ = sptInfo.RhzSpectrum; spectralIndices.RVT = sptInfo.RvtSpectrum; spectralIndices.RPS = sptInfo.RpsSpectrum; spectralIndices.RNG = sptInfo.RngSpectrum; summaryIndices.SptDensity = sptInfo.TrackDensity; // these are two other indices that I tried but they do not seem to add anything of interest. //summaryIndices.AvgSptDuration = sptInfo.AvTrackDuration; //summaryIndices.SptPerSecond = sptInfo.TotalTrackCount / subsegmentSecondsDuration; // ###################################################################################################################################################### // vi: CLUSTERING - FIRST DETERMINE IF IT IS WORTH DOING // return if (activeFrameCount too small || eventCount == 0 || short index calc duration) because no point doing clustering if (activity.ActiveFrameCount <= 2 || Math.Abs(activity.EventCount) < 0.01 || indexCalculationDuration.TotalSeconds < 15) { // IN ADDITION return if indexCalculationDuration < 15 seconds because no point doing clustering on short time segment // NOTE: Activity was calculated with 3dB threshold AFTER backgroundnoise removal. //summaryIndices.AvgClusterDuration = TimeSpan.Zero; summaryIndices.ClusterCount = 0; summaryIndices.ThreeGramCount = 0; return(result); } // YES WE WILL DO CLUSTERING! to determine cluster count (spectral diversity) and spectral persistence. // Only use midband decibel SPECTRUM. In June 2016, the mid-band (i.e. the bird-band) was set to lowerBound=1000Hz, upperBound=8000hz. // Actually do clustering of binary spectra. Must first threshold double binaryThreshold = SpectralClustering.DefaultBinaryThresholdInDecibels; var midBandSpectrogram = MatrixTools.Submatrix(deciBelSpectrogram, 0, lowerBinBound, deciBelSpectrogram.GetLength(0) - 1, middleBinBound); var clusterInfo = SpectralClustering.ClusterTheSpectra(midBandSpectrogram, lowerBinBound, middleBinBound, binaryThreshold); // Store two summary index values from cluster info summaryIndices.ClusterCount = clusterInfo.ClusterCount; summaryIndices.ThreeGramCount = clusterInfo.TriGramUniqueCount; // As of May 2017, no longer store clustering results superimposed on spectrogram. // If you want to see this, then call the TEST methods in class SpectralClustering.cs. // ####################################################################################################################################################### // vii: set up other info to return var freqPeaks = SpectralPeakTracks.ConvertSpectralPeaksToNormalisedArray(deciBelSpectrogram); var scores = new List <Plot> { new Plot("Decibels", DataTools.normalise(dBEnvelopeSansNoise), ActivityAndCover.DefaultActivityThresholdDb), new Plot("Active Frames", DataTools.Bool2Binary(activity.ActiveFrames), 0.0), new Plot("Max Frequency", freqPeaks, 0.0), // relative location of freq maxima in spectra }; result.Hits = sptInfo.Peaks; result.TrackScores = scores; return(result); } // end Calculation of Summary and Spectral Indices
public void TestEnvelopeAndFft1() { var recording = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav")); int windowSize = 512; // window overlap is used only for sonograms. It is not used when calculating acoustic indices. double windowOverlap = 0.0; var windowFunction = WindowFunctions.HAMMING.ToString(); var fftdata = DSP_Frames.ExtractEnvelopeAndFfts( recording, windowSize, windowOverlap, windowFunction); // Now recover the data // The following data is required when constructing sonograms var duration = recording.WavReader.Time; var sr = recording.SampleRate; var frameCount = fftdata.FrameCount; var fractionOfHighEnergyFrames = fftdata.FractionOfHighEnergyFrames; var epislon = fftdata.Epsilon; var windowPower = fftdata.WindowPower; var amplSpectrogram = fftdata.AmplitudeSpectrogram; // The below info is only used when calculating spectral and summary indices /* * // energy level information * int clipCount = fftdata.ClipCount; * int maxAmpCount = fftdata.MaxAmplitudeCount; * double maxSig = fftdata.MaxSignalValue; * double minSig = fftdata.MinSignalValue; * * // envelope info * var avArray = fftdata.Average; * var envelope = fftdata.Envelope; * var frameEnergy = fftdata.FrameEnergy; * var frameDecibels = fftdata.FrameDecibels; * * // freq scale info * var nyquistBin = fftdata.NyquistBin; * var nyquistFreq = fftdata.NyquistFreq; * var freqBinWidth = fftdata.FreqBinWidth; */ // DO THE TESTS int expectedSR = 22050; Assert.AreEqual(expectedSR, sr); Assert.AreEqual("00:01:00.2450000", duration.ToString()); Assert.AreEqual(2594, frameCount); int expectedBitsPerSample = 16; double expectedEpsilon = Math.Pow(0.5, expectedBitsPerSample - 1); Assert.AreEqual(expectedEpsilon, epislon); double expectedWindowPower = 203.0778; Assert.AreEqual(expectedWindowPower, windowPower, 0.0001); Assert.AreEqual(0.0, fractionOfHighEnergyFrames, 0.0000001); // Test sonogram data matrix by comparing the vector of column sums. double[] columnSums = MatrixTools.SumColumns(amplSpectrogram); var sumFile = PathHelper.ResolveAsset(@"EnvelopeAndFft\BAC2_20071008-085040_DataColumnSums.bin"); // uncomment this to update the binary data. Should be rarely needed // AT: Updated 2017-02-15 because FFT library changed in 864f7a491e2ea0e938161bd390c1c931ecbdf63c //Binary.Serialize(sumFile, columnSums); var expectedColSums = Binary.Deserialize <double[]>(sumFile); var totalDelta = expectedColSums.Zip(columnSums, ValueTuple.Create).Select(x => Math.Abs(x.Item1 - x.Item2)).Sum(); var avgDelta = expectedColSums.Zip(columnSums, ValueTuple.Create).Select(x => Math.Abs(x.Item1 - x.Item2)).Average(); Assert.AreEqual(expectedColSums[0], columnSums[0], Delta, $"\nE: {expectedColSums[0]:R}\nA: {columnSums[0]:R}\nD: {expectedColSums[0] - columnSums[0]:R}\nT: {totalDelta:R}\nA: {avgDelta}\nn: {expectedColSums.Length}"); CollectionAssert.That.AreEqual(expectedColSums, columnSums, Delta); }
} //Analysis() public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent> > DetectHarmonics( AudioRecording recording, double intensityThreshold, int minHz, int minFormantgap, int maxFormantgap, double minDuration, int windowSize, double windowOverlap, TimeSpan segmentStartOffset) { //i: MAKE SONOGRAM int numberOfBins = 32; double binWidth = recording.SampleRate / (double)windowSize; int sr = recording.SampleRate; double frameDuration = windowSize / (double)sr; // Duration of full frame or window in seconds double frameOffset = frameDuration * (1 - windowOverlap); //seconds between starts of consecutive frames double framesPerSecond = 1 / frameOffset; //double framesPerSecond = sr / (double)windowSize; //int frameOffset = (int)(windowSize * (1 - overlap)); //int frameCount = (length - windowSize + frameOffset) / frameOffset; double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); var results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram( recording.WavReader.Samples, sr, epsilon, windowSize, windowOverlap); double[] avAbsolute = results2.Average; //average absolute value over the minute recording //double[] envelope = results2.Item2; double[,] matrix = results2 .AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. double windowPower = results2.WindowPower; //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int minBin = (int)Math.Round(minHz / binWidth); int maxHz = (int)Math.Round(minHz + (numberOfBins * binWidth)); int rowCount = matrix.GetLength(0); int colCount = matrix.GetLength(1); int maxbin = minBin + numberOfBins; double[,] subMatrix = MatrixTools.Submatrix(matrix, 0, minBin + 1, rowCount - 1, maxbin); //ii: DETECT HARMONICS int zeroBinCount = 5; //to remove low freq content which dominates the spectrum var results = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount); double[] intensity = results.Item1; //an array of periodicity scores double[] periodicity = results.Item2; //transfer periodicity info to a hits matrix. //intensity = DataTools.filterMovingAverage(intensity, 3); double[] scoreArray = new double[intensity.Length]; var hits = new double[rowCount, colCount]; for (int r = 0; r < rowCount; r++) { double relativePeriod = periodicity[r] / numberOfBins / 2; if (intensity[r] > intensityThreshold) { for (int c = minBin; c < maxbin; c++) { hits[r, c] = relativePeriod; } } double herzPeriod = periodicity[r] * binWidth; if (herzPeriod > minFormantgap && herzPeriod < maxFormantgap) { scoreArray[r] = 2 * intensity[r] * intensity[r]; //enhance high score wrt low score. } } scoreArray = DataTools.filterMovingAverage(scoreArray, 11); //iii: CONVERT TO ACOUSTIC EVENTS double maxDuration = 100000.0; //abitrary long number - do not want to restrict duration of machine noise List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( scoreArray, minHz, maxHz, framesPerSecond, binWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); hits = null; //set up the songogram to return. Use the existing amplitude sonogram int bitsPerSample = recording.WavReader.BitsPerSample; TimeSpan duration = recording.Duration; NoiseReductionType nrt = SNR.KeyToNoiseReductionType("STANDARD"); var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram( recording.BaseName, windowSize, windowOverlap, bitsPerSample, windowPower, sr, duration, nrt, matrix); sonogram.DecibelsNormalised = new double[rowCount]; //foreach frame or time step for (int i = 0; i < rowCount; i++) { sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]); } sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised); return(Tuple.Create(sonogram, hits, scoreArray, predictedEvents)); } //end Execute_HDDetect
/// <summary> /// Calculate summary statistics for supplied temporal and spectral targets. /// </summary> /// <remarks> /// The acoustic statistics calculated in this method are based on methods outlined in /// "Acoustic classification of multiple simultaneous bird species: A multi-instance multi-label approach", /// by Forrest Briggs, Balaji Lakshminarayanan, Lawrence Neal, Xiaoli Z.Fern, Raviv Raich, Sarah J.K.Hadley, Adam S. Hadley, Matthew G. Betts, et al. /// The Journal of the Acoustical Society of America v131, pp4640 (2012); doi: http://dx.doi.org/10.1121/1.4707424 /// .. /// The Briggs feature are calculated from the column (freq bin) and row (frame) sums of the extracted spectrogram. /// 1. Gini Index for frame and bin sums. A measure of dispersion. Problem with gini is that its value is dependent on the row or column count. /// We use entropy instead because value not dependent on row or column count because it is normalized. /// For the following meausres of k-central moments, the freq and time values are normalized in 0,1 to width of the event. /// 2. freq-mean /// 3. freq-variance /// 4. freq-skew and kurtosis /// 5. time-mean /// 6. time-variance /// 7. time-skew and kurtosis /// 8. freq-max (normalized) /// 9. time-max (normalized) /// 10. Briggs et al also calculate a 16 value histogram of gradients for each event mask. We do not do that here although we could. /// ... /// NOTE 1: There are differences between our method of noise reduction and Briggs. Briggs does not convert to decibels /// and instead works with power values. He obtains a noise profile from the 20% of frames having the lowest energy sum. /// NOTE 2: To NormaliseMatrixValues for noise, they divide the actual energy by the noise value. This is equivalent to subtraction when working in decibels. /// There are advantages and disadvantages to Briggs method versus ours. In our case, we hve to convert decibel values back to /// energy values when calculating the statistics for the extracted acoustic event. /// NOTE 3: We do not calculate the higher central moments of the time/frequency profiles, i.e. skew and kurtosis. /// Ony mean and standard deviation. /// .. /// NOTE 4: This method assumes that the passed event occurs totally within the passed recording, /// AND that the passed recording is of sufficient duration to obtain reliable BGN noise profile /// BUT not so long as to cause memory constipation. /// </remarks> /// <param name="recording">as type AudioRecording which contains the event</param> /// <param name="temporalTarget">Both start and end bounds - relative to the supplied recording</param> /// <param name="spectralTarget">both bottom and top bounds in Hertz</param> /// <param name="config">parameters that determine the outcome of the analysis</param> /// <param name="segmentStartOffset">How long since the start of the recording this event occurred</param> /// <returns>an instance of EventStatistics</returns> public static EventStatistics AnalyzeAudioEvent( AudioRecording recording, Range <TimeSpan> temporalTarget, Range <double> spectralTarget, EventStatisticsConfiguration config, TimeSpan segmentStartOffset) { var stats = new EventStatistics { EventStartSeconds = temporalTarget.Minimum.TotalSeconds, EventEndSeconds = temporalTarget.Maximum.TotalSeconds, LowFrequencyHertz = spectralTarget.Minimum, HighFrequencyHertz = spectralTarget.Maximum, SegmentDurationSeconds = recording.Duration.TotalSeconds, SegmentStartSeconds = segmentStartOffset.TotalSeconds, }; // temporal target is supplied relative to recording, but not the supplied audio segment // shift coordinates relative to segment var localTemporalTarget = temporalTarget.Shift(-segmentStartOffset); if (!recording .Duration .AsRangeFromZero(Topology.Inclusive) .Contains(localTemporalTarget)) { stats.Error = true; stats.ErrorMessage = $"Audio not long enough ({recording.Duration}) to analyze target ({localTemporalTarget})"; return(stats); } // convert recording to spectrogram int sampleRate = recording.SampleRate; double epsilon = recording.Epsilon; // extract the spectrogram var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, config.FrameSize, config.FrameStep); double hertzBinWidth = dspOutput1.FreqBinWidth; var stepDurationInSeconds = config.FrameStep / (double)sampleRate; var startFrame = (int)Math.Ceiling(localTemporalTarget.Minimum.TotalSeconds / stepDurationInSeconds); // subtract 1 frame because want to end before start of end point. var endFrame = (int)Math.Floor(localTemporalTarget.Maximum.TotalSeconds / stepDurationInSeconds) - 1; var bottomBin = (int)Math.Floor(spectralTarget.Minimum / hertzBinWidth); var topBin = (int)Math.Ceiling(spectralTarget.Maximum / hertzBinWidth); // Events can have their high value set to the nyquist. // Since the submatrix call below uses an inclusive upper bound an index out of bounds exception occurs in // these cases. So we just ask for the bin below. if (topBin >= config.FrameSize / 2) { topBin = (config.FrameSize / 2) - 1; } // Convert amplitude spectrogram to deciBels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // extract the required acoustic event var eventMatrix = MatrixTools.Submatrix(decibelSpectrogram, startFrame, bottomBin, endFrame, topBin); // Get the SNR of the event. This is just the max value in the matrix because noise reduced MatrixTools.MinMax(eventMatrix, out _, out double max); stats.SnrDecibels = max; // Now need to convert event matrix back to energy values before calculating other statistics eventMatrix = MatrixTools.Decibels2Power(eventMatrix); var columnAverages = MatrixTools.GetColumnAverages(eventMatrix); var rowAverages = MatrixTools.GetRowAverages(eventMatrix); // calculate the mean and temporal standard deviation in decibels NormalDist.AverageAndSD(rowAverages, out double mean, out double stddev); stats.MeanDecibels = 10 * Math.Log10(mean); stats.TemporalStdDevDecibels = 10 * Math.Log10(stddev); // calculate the frequency standard deviation in decibels NormalDist.AverageAndSD(columnAverages, out mean, out stddev); stats.FreqBinStdDevDecibels = 10 * Math.Log10(stddev); // calculate relative location of the temporal maximum int maxRowId = DataTools.GetMaxIndex(rowAverages); stats.TemporalMaxRelative = maxRowId / (double)rowAverages.Length; // calculate the entropy dispersion/concentration indices stats.TemporalEnergyDistribution = 1 - DataTools.EntropyNormalised(rowAverages); stats.SpectralEnergyDistribution = 1 - DataTools.EntropyNormalised(columnAverages); // calculate the spectral centroid and the dominant frequency double binCentroid = CalculateSpectralCentroid(columnAverages); stats.SpectralCentroid = (int)Math.Round(hertzBinWidth * binCentroid) + (int)spectralTarget.Minimum; int maxColumnId = DataTools.GetMaxIndex(columnAverages); stats.DominantFrequency = (int)Math.Round(hertzBinWidth * maxColumnId) + (int)spectralTarget.Minimum; // remainder of this method is to produce debugging images. Can comment out when not debugging. /* * var normalisedIndex = DataTools.NormaliseMatrixValues(columnAverages); * var image4 = GraphsAndCharts.DrawGraph("columnSums", normalisedIndex, 100); * string path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\columnSums.png"; * image4.Save(path4); * normalisedIndex = DataTools.NormaliseMatrixValues(rowAverages); * image4 = GraphsAndCharts.DrawGraph("rowSums", normalisedIndex, 100); * path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\rowSums.png"; * image4.Save(path4); */ return(stats); }
public static Tuple <Dictionary <string, double>, TimeSpan> RainAnalyser(FileInfo fiAudioFile, AnalysisSettings analysisSettings, SourceMetadata originalFile) { Dictionary <string, string> config = analysisSettings.ConfigDict; // get parameters for the analysis int frameSize = IndexCalculateConfig.DefaultWindowSize; double windowOverlap = 0.0; int lowFreqBound = 1000; int midFreqBound = 8000; if (config.ContainsKey(AnalysisKeys.FrameLength)) { frameSize = ConfigDictionary.GetInt(AnalysisKeys.FrameLength, config); } if (config.ContainsKey(key_LOW_FREQ_BOUND)) { lowFreqBound = ConfigDictionary.GetInt(key_LOW_FREQ_BOUND, config); } if (config.ContainsKey(key_MID_FREQ_BOUND)) { midFreqBound = ConfigDictionary.GetInt(key_MID_FREQ_BOUND, config); } if (config.ContainsKey(AnalysisKeys.FrameOverlap)) { windowOverlap = ConfigDictionary.GetDouble(AnalysisKeys.FrameOverlap, config); } // get recording segment AudioRecording recording = new AudioRecording(fiAudioFile.FullName); // calculate duration/size of various quantities. int signalLength = recording.WavReader.Samples.Length; TimeSpan audioDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); double duration = frameSize * (1 - windowOverlap) / (double)recording.SampleRate; TimeSpan frameDuration = TimeSpan.FromTicks((long)(duration * TimeSpan.TicksPerSecond)); int chunkDuration = 10; //seconds double framesPerSecond = 1 / frameDuration.TotalSeconds; int chunkCount = (int)Math.Round(audioDuration.TotalSeconds / (double)chunkDuration); int framesPerChunk = (int)(chunkDuration * framesPerSecond); string[] classifications = new string[chunkCount]; //i: EXTRACT ENVELOPE and FFTs double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); var signalextract = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, epsilon, frameSize, windowOverlap); double[] envelope = signalextract.Envelope; double[,] spectrogram = signalextract.AmplitudeSpectrogram; //amplitude spectrogram int colCount = spectrogram.GetLength(1); int nyquistFreq = recording.Nyquist; int nyquistBin = spectrogram.GetLength(1) - 1; double binWidth = nyquistFreq / (double)spectrogram.GetLength(1); // calculate the bin id of boundary between mid and low frequency spectrum int lowBinBound = (int)Math.Ceiling(lowFreqBound / binWidth); // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this iwll be less than 17640/2. int originalAudioNyquist = originalFile.SampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz. if (recording.Nyquist > originalAudioNyquist) { nyquistFreq = originalAudioNyquist; nyquistBin = (int)Math.Floor(originalAudioNyquist / binWidth); } // vi: CALCULATE THE ACOUSTIC COMPLEXITY INDEX var subBandSpectrogram = MatrixTools.Submatrix(spectrogram, 0, lowBinBound, spectrogram.GetLength(0) - 1, nyquistBin); double[] aciArray = AcousticComplexityIndex.CalculateACI(subBandSpectrogram); double aci1 = aciArray.Average(); // ii: FRAME ENERGIES - // convert signal to decibels and subtract background noise. double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction var results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(signalextract.Envelope), StandardDeviationCount); var dBarray = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal); //// vii: remove background noise from the full spectrogram i.e. BIN 1 to Nyquist //spectrogramData = MatrixTools.Submatrix(spectrogramData, 0, 1, spectrogramData.GetLength(0) - 1, nyquistBin); //const double SpectralBgThreshold = 0.015; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background //double[] modalValues = SNR.CalculateModalValues(spectrogramData); // calculate modal value for each freq bin. //modalValues = DataTools.filterMovingAverage(modalValues, 7); // smooth the modal profile //spectrogramData = SNR.SubtractBgNoiseFromSpectrogramAndTruncate(spectrogramData, modalValues); //spectrogramData = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogramData, SpectralBgThreshold); //set up the output if (Verbose) { LoggedConsole.WriteLine("{0:d2}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2"); } StringBuilder sb = null; if (WriteOutputFile) { string header = string.Format("{0:d2},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2"); sb = new StringBuilder(header + "\n"); } Dictionary <string, double> dict = RainIndices.GetIndices(envelope, audioDuration, frameDuration, spectrogram, lowFreqBound, midFreqBound, binWidth); return(Tuple.Create(dict, audioDuration)); } //Analysis()
/// <summary> /// returns the duration of that part of frame not overlapped with follwoing frame. /// Duration is given in seconds. /// Assumes window size and overlap fraction already known. /// </summary> public double GetFrameOffset(int sampleRate) { int step = DSP_Frames.FrameStep(this.WindowSize, this.WindowOverlap); return(step / (double)sampleRate); }
FrogRibbitRecognizer(AudioRecording recording, string filterName, int midBandFreq, double windowDuration = 5.0, double windowOverlap = 0.5, double dctDuration = 0.5, double dctThreshold = 0.4, bool normaliseDCT = false, int minOscilRate = 11, int maxOscilRate = 17, double maxOscilScore = 20.0) { int sr = recording.SampleRate; int windowSize = (int)(windowDuration * sr / 1000.0); double frameStep = windowDuration * (1 - windowOverlap); double framesPerSecond = 1000 / frameStep; //i: Apply filter Log.WriteLine("# Filter: " + filterName); var filteredRecording = AudioRecording.Filter_IIR(recording, filterName); //return new filtered audio recording. int signalLength = filteredRecording.WavReader.Samples.Length; //ii: FRAMING int[,] frameIDs = DSP_Frames.FrameStartEnds(signalLength, windowSize, windowOverlap); int frameCount = frameIDs.GetLength(0); //iii: EXTRACT ENVELOPE and ZERO-CROSSINGS Log.WriteLine("# Extract Envelope and Zero-crossings."); var results2 = DSP_Frames.ExtractEnvelopeAndZeroCrossings(filteredRecording.WavReader.Samples, sr, windowSize, windowOverlap); //double[] average = results2.Item1; double[] envelope = results2.Item2; double[] zeroCrossings = results2.Item3; //double[] sampleZCs = results2.Item4; double[] sampleStd = results2.Item5; Log.WriteLine("# Normalize values."); //iv: FRAME ENERGIES double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction var results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(envelope), StandardDeviationCount); var dBarray = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal); //v: CONVERSIONS: ZERO CROSSINGS to herz - then NORMALIZE to Fuzzy freq int[] freq = DSP_Frames.ConvertZeroCrossings2Hz(zeroCrossings, windowSize, sr); int sideBand = (int)(midBandFreq * 0.1); var fuzzyFreq = FuzzyFrequency(freq, midBandFreq, sideBand); //vi: CONVERSIONS: convert sample std deviations to milliseconds - then NORMALIZE to PROBs double[] tsd = DSP_Frames.ConvertSamples2Milliseconds(sampleStd, sr); //time standard deviation //for (int i = 0; i < tsd.Length; i++) if (tsd[i]) LoggedConsole.WriteLine(i + " = " + tsd[i]); //filter the freq array to remove values derived from frames with high standard deviation double[] tsdScores = NormalDist.Values2Probabilities(tsd); //vii: GET OSCILLATION SCORE AND NORMALIZE double[] rawOscillations = Oscillations2010.DetectOscillationsInScoreArray(dBarray, dctDuration, framesPerSecond, dctThreshold, normaliseDCT, minOscilRate, maxOscilRate); //NormaliseMatrixValues oscillation scores wrt scores obtained on a training. //double maxOscillationScore = rawOscillations[DataTools.GetMaxIndex(rawOscillations)]; //LoggedConsole.WriteLine("maxOscillationScore=" + maxOscillationScore); var oscillations = new double[dBarray.Length]; for (int i = 0; i < dBarray.Length; i++) { oscillations[i] = rawOscillations[i] / maxOscilScore; if (oscillations[i] > 1.0) { oscillations[i] = 1.0; } } //viii: COMBINE the SCORES Log.WriteLine("# Combine Scores."); var combinedScores = new double[dBarray.Length]; for (int i = 0; i < dBarray.Length; i++) { combinedScores[i] = fuzzyFreq[i] * tsdScores[i] * oscillations[i]; } //ix: fill in the oscillation scores combinedScores = Oscillations2010.FillScoreArray(combinedScores, dctDuration, framesPerSecond); return(Tuple.Create(combinedScores, filteredRecording, dBarray, tsd)); }