public AudioRecordingStopsAfterRecording( AudioRecording anAudioRecording, Microphone aMicrophone) { audioRecording = anAudioRecording; microphone = aMicrophone; }
public AudioRecordingSucceedsWithMicrophone( AudioRecording anAudioRecording, Microphone aMicrophone) { audioRecording = anAudioRecording; microphone = aMicrophone; }
public AudioRecordingFailsWithoutMicrophone( AudioRecording aPreviousAudioRecording, AudioRecording aNewAudioRecording, Microphone aMicrophone) { previousAudioRecording = aPreviousAudioRecording; audioRecording = aNewAudioRecording; microphone = aMicrophone; }
private void WriteDebugImage( AudioRecording recording, DirectoryInfo outputDirectory, BaseSonogram sonogram, List <AcousticEvent> acousticEvents, List <Plot> plots, double[,] hits) { //DEBUG IMAGE this recogniser only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { Image debugImage1 = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, plots, hits); var debugPath1 = outputDirectory.Combine( FilenameHelpers.AnalysisResultName( Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram1")); debugImage1.Save(debugPath1.FullName); // save new image with longer frame var sonoConfig2 = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 1024, WindowOverlap = 0, NoiseReductionType = NoiseReductionType.None, //NoiseReductionType = NoiseReductionType.STANDARD, //NoiseReductionParameter = 0.1 }; BaseSonogram sonogram2 = new SpectrogramStandard(sonoConfig2, recording.WavReader); var debugPath2 = outputDirectory.Combine( FilenameHelpers.AnalysisResultName( Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram2")); Image debugImage2 = SpectrogramTools.GetSonogramPlusCharts(sonogram2, acousticEvents, plots, null); debugImage2.Save(debugPath2.FullName); } }
public static void Main(Arguments arguments) { // 1. set up the necessary files var sourceRecording = arguments.Source; var configInfo = ConfigFile.Deserialize <SpectrogramGeneratorConfig>(arguments.Config.ToFileInfo()); DirectoryInfo output = arguments.Output; if (!output.Exists) { output.Create(); } //if (arguments.StartOffset.HasValue ^ arguments.EndOffset.HasValue) //{ // throw new InvalidStartOrEndException("If StartOffset or EndOffset is specified, then both must be specified"); //} // set default offsets - only use defaults if not provided in arguments list // var offsetsProvided = arguments.StartOffset.HasValue && arguments.EndOffset.HasValue; //TimeSpan? startOffset; //TimeSpan? endOffset; //if (offsetsProvided) //{ // startOffset = TimeSpan.FromSeconds(arguments.StartOffset.Value); // endOffset = TimeSpan.FromSeconds(arguments.EndOffset.Value); //} const string title = "# MAKE MULTIPLE SONOGRAMS FROM AUDIO RECORDING"; string date = "# DATE AND TIME: " + DateTime.Now; LoggedConsole.WriteLine(title); LoggedConsole.WriteLine(date); LoggedConsole.WriteLine("# Input audio file: " + sourceRecording.Name); // 3: CREATE A TEMPORARY RECORDING int resampleRate = configInfo.GetIntOrNull("ResampleRate") ?? 22050; var tempAudioSegment = AudioRecording.CreateTemporaryAudioFile(sourceRecording, output, resampleRate); // 4: GENERATE SPECTROGRAM images //string sourceName = sourceRecording.FullName; string sourceName = Path.GetFileNameWithoutExtension(sourceRecording.FullName); var result = SpectrogramGenerator.GenerateSpectrogramImages(tempAudioSegment, configInfo, sourceName); // 5: Save the image var outputImageFile = new FileInfo(Path.Combine(output.FullName, sourceName + ".Spectrograms.png")); result.CompositeImage.Save(outputImageFile.FullName); }
public static double[,] GetAmplitudeSpectrogramNoiseReduced(AudioRecording recording, int frameSize) { int frameStep = frameSize; // get amplitude spectrogram and remove the DC column ie column zero. var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep); // remove background noise from the full amplitude spectrogram const double sdCount = 0.1; const double spectralBgThreshold = 0.003; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background var profile = NoiseProfile.CalculateModalNoiseProfile(results.AmplitudeSpectrogram, sdCount); //calculate noise profile - assumes a dB spectrogram. double[] noiseValues = DataTools.filterMovingAverage(profile.NoiseThresholds, 7); // smooth the noise profile var amplitudeSpectrogram = SNR.NoiseReduce_Standard(results.AmplitudeSpectrogram, noiseValues, spectralBgThreshold); return(amplitudeSpectrogram); }
public void TestBinaryClusteringOfSpectra() { //string wavFilePath = @"C:\SensorNetworks\WavFiles\TestRecordings\BAC\BAC2_20071008-085040.wav"; var wavFilePath = PathHelper.ResolveAsset(@"Recordings", "BAC2_20071008-085040.wav"); // var outputDir = this.outputDirectory; var outputDir = PathHelper.ResolveAssetPath("BinaryClustering"); // only use this to write expected output. int frameSize = 512; var recording = new AudioRecording(wavFilePath); // get recording segment // for deriving binary spectrogram double binaryThreshold = SpectralClustering.DefaultBinaryThresholdInDecibels; // A decibel threshold for post noise removal double[,] spectrogramData = SpectralClustering.GetDecibelSpectrogramNoiseReduced(recording, frameSize); // We only use the midband of the Spectrogram, i.e. the band between lowerBinBound and upperBinBound. // In June 2016, the mid-band was set to lowerBound=1000Hz, upperBound=8000hz, because this band contains most bird activity, i.e. it is the Bird-Band // This was done in order to make the cluster summary indices more reflective of bird call activity. int freqBinCount = spectrogramData.GetLength(1); double binWidth = recording.Nyquist / (double)freqBinCount; int lowerFreqBound = 1000; int lowerBinBound = (int)Math.Ceiling(lowerFreqBound / binWidth); int upperFreqBound = 8000; int upperBinBound = (int)Math.Ceiling(upperFreqBound / binWidth); var midBandSpectrogram = MatrixTools.Submatrix(spectrogramData, 0, lowerBinBound, spectrogramData.GetLength(0) - 1, upperBinBound); var clusterInfo = SpectralClustering.ClusterTheSpectra(midBandSpectrogram, lowerBinBound, upperBinBound, binaryThreshold); // transfer cluster info to spectral index results var clusterSpectrum = SpectralClustering.RestoreFullLengthSpectrum(clusterInfo.ClusterSpectrum, freqBinCount, lowerBinBound); // test the cluster count - also called spectral diversity in some papers Assert.AreEqual(clusterInfo.ClusterCount, 17); // test the trigram count - another way of thinking about spectral change Assert.AreEqual(clusterInfo.TriGramUniqueCount, 342); // test what used to be the CLS spectral index. Sum of the rows of the weight vectors. var expectedSpectrumFile = new FileInfo(outputDir + "\\clusterSpectrum.bin"); // Binary.Serialize(expectedSpectrumFile, clusterSpectrum); var expectedVector = Binary.Deserialize <double[]>(expectedSpectrumFile); CollectionAssert.AreEqual(expectedVector, clusterSpectrum); }
public AnalysisResult2 Analyze <T>(AnalysisSettings analysisSettings, SegmentSettings <T> segmentSettings) { var audioFile = segmentSettings.SegmentAudioFile; var recording = new AudioRecording(audioFile.FullName); var outputDirectory = segmentSettings.SegmentOutputDirectory; var analysisResult = new AnalysisResult2(analysisSettings, segmentSettings, recording.Duration); Config configuration = ConfigFile.Deserialize(analysisSettings.ConfigFile); bool saveCsv = analysisSettings.AnalysisDataSaveBehavior; if (configuration.GetBool(AnalysisKeys.MakeSoxSonogram)) { Log.Warn("SoX spectrogram generation config variable found (and set to true) but is ignored when running as an IAnalyzer"); } // generate spectrogram var configurationDictionary = new Dictionary <string, string>(configuration.ToDictionary()); configurationDictionary[ConfigKeys.Recording.Key_RecordingCallName] = audioFile.FullName; configurationDictionary[ConfigKeys.Recording.Key_RecordingFileName] = audioFile.Name; var soxImage = new FileInfo(Path.Combine(segmentSettings.SegmentOutputDirectory.FullName, audioFile.Name + ".SOX.png")); var spectrogramResult = Audio2Sonogram.GenerateFourSpectrogramImages( audioFile, soxImage, configurationDictionary, dataOnly: analysisSettings.AnalysisImageSaveBehavior.ShouldSave(analysisResult.Events.Length), makeSoxSonogram: false); // this analysis produces no results! // but we still print images (that is the point) if (analysisSettings.AnalysisImageSaveBehavior.ShouldSave(analysisResult.Events.Length)) { Debug.Assert(segmentSettings.SegmentImageFile.Exists); } if (saveCsv) { var basename = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name); var spectrogramCsvFile = outputDirectory.CombineFile(basename + ".Spectrogram.csv"); Csv.WriteMatrixToCsv(spectrogramCsvFile, spectrogramResult.DecibelSpectrogram.Data, TwoDimensionalArray.None); } return(analysisResult); }
/// <summary> /// METHOD TO CHECK IF Octave FREQ SCALE IS WORKING /// Check it on MARINE RECORDING from JASCO, SR=64000. /// 24 BIT JASCO RECORDINGS from GBR must be converted to 16 bit. /// ffmpeg -i source_file.wav -sample_fmt s16 out_file.wav /// e.g. ". C:\Work\Github\audio-analysis\Extra Assemblies\ffmpeg\ffmpeg.exe" -i "C:\SensorNetworks\WavFiles\MarineRecordings\JascoGBR\AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56.wav" -sample_fmt s16 "C:\SensorNetworks\Output\OctaveFreqScale\JascoeMarineGBR116bit.wav" /// ffmpeg binaries are in C:\Work\Github\audio-analysis\Extra Assemblies\ffmpeg /// </summary> public static void TESTMETHOD_OctaveFrequencyScale2() { var recordingPath = @"C:\SensorNetworks\SoftwareTests\TestRecordings\MarineJasco_AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56-16bit.wav"; var outputDir = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo(); var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo(); var outputImagePath = Path.Combine(outputDir.FullName, "JascoMarineGBR1.png"); var opFileStem = "JascoMarineGBR1"; var recording = new AudioRecording(recordingPath); var fst = FreqScaleType.Linear125Octaves7Tones28Nyquist32000; var freqScale = new FrequencyScale(fst); var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); // DO NOISE REDUCTION var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); sonogram.Data = dataMatrix; sonogram.Configuration.WindowSize = freqScale.WindowSize; var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image.Save(outputImagePath); // DO FILE EQUALITY TEST string testName = "test2"; var expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "FrequencyOctaveScaleTest2.EXPECTED.json")); var resultFile = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "FrequencyOctaveScaleTest2Results.json")); Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations); TestTools.FileEqualityTest(testName, resultFile, expectedTestFile); LoggedConsole.WriteLine("Completed Octave Frequency Scale " + testName); Console.WriteLine("\n\n"); }
public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, double[], TimeSpan> Execute_ODDetect(FileInfo wavPath, bool doSegmentation, int minHz, int maxHz, double frameOverlap, double dctDuration, double dctThreshold, int minOscilFreq, int maxOscilFreq, double eventThreshold, double minDuration, double maxDuration) { //i: GET RECORDING AudioRecording recording = new AudioRecording(wavPath.FullName); //if (recording.SampleRate != 22050) recording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE int sr = recording.SampleRate; //ii: MAKE SONOGRAM Log.WriteLine("Start sonogram."); SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.WindowOverlap = frameOverlap; sonoConfig.SourceFName = recording.BaseName; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); Log.WriteLine("Signal: Duration={0}, Sample Rate={1}", sonogram.Duration, sr); Log.WriteLine("Frames: Size={0}, Count={1}, Duration={2:f1}ms, Overlap={5:f0}%, Offset={3:f1}ms, Frames/s={4:f1}", sonogram.Configuration.WindowSize, sonogram.FrameCount, sonogram.FrameDuration * 1000, sonogram.FrameStep * 1000, sonogram.FramesPerSecond, frameOverlap); int binCount = (int)(maxHz / sonogram.FBinWidth) - (int)(minHz / sonogram.FBinWidth) + 1; Log.WriteIfVerbose("Freq band: {0} Hz - {1} Hz. (Freq bin count = {2})", minHz, maxHz, binCount); Log.WriteIfVerbose("DctDuration=" + dctDuration + "sec. (# frames=" + (int)Math.Round(dctDuration * sonogram.FramesPerSecond) + ")"); Log.WriteIfVerbose("Score threshold for oscil events=" + eventThreshold); Log.WriteLine("Start OD event detection"); //iii: DETECT OSCILLATIONS bool normaliseDCT = true; List <AcousticEvent> predictedEvents; //predefinition of results event list double[] scores; //predefinition of score array double[,] hits; //predefinition of hits matrix - to superimpose on sonogram image double[] segments; //predefinition of segmentation of recording TimeSpan analysisTime; //predefinition of Time duration taken to do analysis on this file Oscillations2010.Execute((SpectrogramStandard)sonogram, doSegmentation, minHz, maxHz, dctDuration, dctThreshold, normaliseDCT, minOscilFreq, maxOscilFreq, eventThreshold, minDuration, maxDuration, out scores, out predictedEvents, out hits, out segments, out analysisTime); return(Tuple.Create(sonogram, hits, scores, predictedEvents, segments, analysisTime)); }//end CaneToadRecogniser
/// <summary> /// This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="audioRecording">one minute of audio recording.</param> /// <param name="config">config file that contains parameters used by all profiles.</param> /// <param name="segmentStartOffset">when recording starts.</param> /// <param name="getSpectralIndexes">not sure what this is.</param> /// <param name="outputDirectory">where the recognizer results can be found.</param> /// <param name="imageWidth"> assuming ????.</param> /// <returns>recognizer results.</returns> public override RecognizerResults Recognize( AudioRecording audioRecording, Config config, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { //class NinoxBoobookConfig is define at bottom of this file. var genericConfig = (KoalaConfig)config; var recognizer = new GenericRecognizer(); RecognizerResults combinedResults = recognizer.Recognize( audioRecording, genericConfig, segmentStartOffset, getSpectralIndexes, outputDirectory, imageWidth); // DO POST-PROCESSING of EVENTS // Convert events to spectral events for possible combining. // Combine overlapping events. If the dB threshold is set low, may get lots of little events. var events = combinedResults.NewEvents; var spectralEvents = events.Select(x => (SpectralEvent)x).ToList(); var newEvents = CompositeEvent.CombineOverlappingEvents(spectralEvents.Cast <EventCommon>().ToList()); if (genericConfig.CombinePossibleSyllableSequence) { // convert events to spectral events for possible combining. //var spectralEvents = events.Select(x => (SpectralEvent)x).ToList(); spectralEvents = newEvents.Cast <SpectralEvent>().ToList(); var startDiff = genericConfig.SyllableStartDifference; var hertzDiff = genericConfig.SyllableHertzGap; newEvents = CompositeEvent.CombineSimilarProximalEvents(spectralEvents, TimeSpan.FromSeconds(startDiff), (int)hertzDiff); } combinedResults.NewEvents = newEvents; //UNCOMMENT following line if you want special debug spectrogram, i.e. with special plots. // NOTE: Standard spectrograms are produced by setting SaveSonogramImages: "True" or "WhenEventsDetected" in <Towsey.PteropusSpecies.yml> config file. //GenericRecognizer.SaveDebugSpectrogram(territorialResults, genericConfig, outputDirectory, audioRecording.BaseName); return(combinedResults); }
public void Setup() { this.outputDirectory = PathHelper.GetTempDir(); this.recording = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav")); // specified linear scale this.freqScale = new FrequencyScale(nyquist: 11025, frameSize: 1024, hertzGridInterval: 1000); // set up the config for each spectrogram this.sonoConfig = new SonogramConfig { WindowSize = this.freqScale.FinalBinCount * 2, WindowOverlap = 0.2, SourceFName = this.recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; }
public AnalysisResult2 Analyze <T>(AnalysisSettings analysisSettings, SegmentSettings <T> segmentSettings) { var audioFile = segmentSettings.SegmentAudioFile; var recording = new AudioRecording(audioFile.FullName); var sourceRecordingName = recording.BaseName; // TODO get the start and end-time offsets for accurate labeling of the time scale. //if (arguments.StartOffset.HasValue ^ arguments.EndOffset.HasValue) //{ // throw new InvalidStartOrEndException("If StartOffset or EndOffset is specified, then both must be specified"); //} // set default offsets - only use defaults if not provided in arguments list // var offsetsProvided = arguments.StartOffset.HasValue && arguments.EndOffset.HasValue; //TimeSpan? startOffset; //TimeSpan? endOffset; //if (offsetsProvided) //{ // startOffset = TimeSpan.FromSeconds(arguments.StartOffset.Value); // endOffset = TimeSpan.FromSeconds(arguments.EndOffset.Value); //} //var outputDirectory = segmentSettings.SegmentOutputDirectory; //bool saveCsv = analysisSettings.AnalysisDataSaveBehavior; var analysisResult = new AnalysisResult2(analysisSettings, segmentSettings, recording.Duration); var configInfo = ConfigFile.Deserialize <AnalyzerConfig>(analysisSettings.ConfigFile); var spectrogramResult = Audio2Sonogram.GenerateSpectrogramImages(audioFile, configInfo, sourceRecordingName); // this analysis produces no results! But we still print images (that is the point) // if (analysisSettings.AnalysisImageSaveBehavior.ShouldSave(analysisResult.Events.Length)) // { // Debug.Assert(condition: segmentSettings.SegmentImageFile.Exists, "Warning: Image file must exist."); spectrogramResult.CompositeImage.Save(segmentSettings.SegmentImageFile.FullName, ImageFormat.Png); // } //if (saveCsv) //{ // var basename = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name); // var spectrogramCsvFile = outputDirectory.CombineFile(basename + ".Spectrogram.csv"); // Csv.WriteMatrixToCsv(spectrogramCsvFile, spectrogramResult.DecibelSpectrogram.Data, TwoDimensionalArray.None); //} return(analysisResult); }
/// <summary> /// Initializes a new instance of the <see cref="AmplitudeSpectrogram"/> class. /// </summary> public AmplitudeSpectrogram(SpectrogramSettings config, WavReader wav) { this.Configuration = config; this.Attributes = new SpectrogramAttributes(); double minDuration = 1.0; if (wav.Time.TotalSeconds < minDuration) { LoggedConsole.WriteLine("Signal must at least {0} seconds long to produce a sonogram!", minDuration); return; } //set attributes for the current recording and spectrogram type this.Attributes.SampleRate = wav.SampleRate; this.Attributes.Duration = wav.Time; this.Attributes.NyquistFrequency = wav.SampleRate / 2; this.Attributes.Duration = wav.Time; this.Attributes.MaxAmplitude = wav.CalculateMaximumAmplitude(); this.Attributes.FrameDuration = TimeSpan.FromSeconds(this.Configuration.WindowSize / (double)wav.SampleRate); var recording = new AudioRecording(wav); var fftdata = DSP_Frames.ExtractEnvelopeAndFfts( recording, config.WindowSize, config.WindowOverlap, this.Configuration.WindowFunction); // now recover required data //epsilon is a signal dependent minimum amplitude value to prevent possible subsequent log of zero value. this.Attributes.Epsilon = fftdata.Epsilon; this.Attributes.WindowPower = fftdata.WindowPower; this.Attributes.FrameCount = fftdata.FrameCount; this.Data = fftdata.AmplitudeSpectrogram; // IF REQUIRED CONVERT TO MEL SCALE if (this.Configuration.DoMelScale) { // this mel scale conversion uses the "Greg integral" ! this.Data = MFCCStuff.MelFilterBank(this.Data, this.Configuration.MelBinCount, this.Attributes.NyquistFrequency, 0, this.Attributes.NyquistFrequency); } }
public void PcaWhiteningDefault() { var recordingPath = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); var fst = FreqScaleType.Linear; var freqScale = new FrequencyScale(fst); var recording = new AudioRecording(recordingPath); var sonoConfig = new SonogramConfig { WindowSize = freqScale.FinalBinCount * 2, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; // GENERATE AMPLITUDE SPECTROGRAM var spectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); spectrogram.Configuration.WindowSize = freqScale.WindowSize; // DO RMS NORMALIZATION spectrogram.Data = SNR.RmsNormalization(spectrogram.Data); // CONVERT NORMALIZED AMPLITUDE SPECTROGRAM TO dB SPECTROGRAM var sonogram = new SpectrogramStandard(spectrogram); // DO NOISE REDUCTION var dataMatrix = PcaWhitening.NoiseReduction(sonogram.Data); sonogram.Data = dataMatrix; // DO PCA WHITENING var whitenedSpectrogram = PcaWhitening.Whitening(sonogram.Data); // DO UNIT TESTING // check if the dimensions of the reverted spectrogram (second output of the pca whitening) is equal to the input matrix Assert.AreEqual(whitenedSpectrogram.Reversion.GetLength(0), sonogram.Data.GetLength(0)); Assert.AreEqual(whitenedSpectrogram.Reversion.GetLength(1), sonogram.Data.GetLength(1)); }
public void TestStandardNoiseRemoval() { var recording = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav")); int windowSize = 512; var sr = recording.SampleRate; // window overlap is used only for sonograms. It is not used when calculating acoustic indices. double windowOverlap = 0.0; var windowFunction = WindowFunctions.HAMMING.ToString(); var fftdata = DSP_Frames.ExtractEnvelopeAndFfts( recording, windowSize, windowOverlap, windowFunction); // Now recover the data // The following data is required when constructing sonograms //var duration = recording.WavReader.Time; //var frameCount = fftdata.FrameCount; //var fractionOfHighEnergyFrames = fftdata.FractionOfHighEnergyFrames; double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(fftdata.AmplitudeSpectrogram, fftdata.WindowPower, sr, fftdata.Epsilon); // The following call to NoiseProfile.CalculateBackgroundNoise(double[,] spectrogram) // returns a noise profile that is used as the BGN spectral index. // It calculates the modal background noise for each freqeuncy bin and then returns a smoothed version. // By default, the number of SDs = 0 and the smoothing window = 7. // Method assumes that the passed spectrogram is oriented as: rows=frames, cols=freq bins.</param> double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(deciBelSpectrogram); var resourcesDir = PathHelper.ResolveAssetPath("Indices"); var expectedSpectrumFile = new FileInfo(resourcesDir + "\\NoiseProfile.bin"); //Binary.Serialize(expectedSpectrumFile, spectralDecibelBgn); var expectedVector = Binary.Deserialize <double[]>(expectedSpectrumFile); CollectionAssert.That.AreEqual(expectedVector, spectralDecibelBgn, 0.000_000_001); }
} // LocalPeaks() /// <summary> /// CALCULATEs SPECTRAL PEAK TRACKS: spectralIndices.SPT, RHZ, RVT, RPS, RNG /// This method is only called from IndexCalulate.analysis() when the IndexCalculation Duration is less than 10 seconds, /// because need to recalculate background noise etc. /// Otherwise the constructor of this class is called: sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold); /// NOTE: We require a noise reduced decibel spectrogram /// FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth. /// </summary> public static SpectralPeakTracks CalculateSpectralPeakTracks(AudioRecording recording, int sampleStart, int sampleEnd, int frameSize, bool octaveScale, double peakThreshold) { double epsilon = recording.Epsilon; int sampleRate = recording.WavReader.SampleRate; int bufferFrameCount = 2; // 2 because must allow for edge effects when using 5x5 grid to find ridges. int ridgeBuffer = frameSize * bufferFrameCount; var ridgeRecording = AudioRecording.GetRecordingSubsegment(recording, sampleStart, sampleEnd, ridgeBuffer); int frameStep = frameSize; var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(ridgeRecording, frameSize, frameStep); // Generate the ridge SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram // i: generate the SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram double[,] decibelSpectrogram; if (octaveScale) { var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); decibelSpectrogram = OctaveFreqScale.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon, freqScale); } else { decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon); } // calculate the noise profile var spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); double nhDecibelThreshold = 2.0; // SPECTRAL dB THRESHOLD for smoothing background decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhDecibelThreshold); // thresholds in decibels // double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second // TimeSpan frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); var sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold); return(sptInfo); }
public static Image <Rgb24> GetCepstralSpectrogram( SonogramConfig sonoConfig, AudioRecording recording, string sourceRecordingName) { // TODO at present noise reduction type must be set = Standard. sonoConfig.NoiseReductionType = NoiseReductionType.Standard; sonoConfig.NoiseReductionParameter = 3.0; var cepgram = new SpectrogramCepstral(sonoConfig, recording.WavReader); var image = cepgram.GetImage(); var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram( "CEPSTRO-GRAM " + sourceRecordingName, image.Width, ImageTags[CepstralSpectrogram]); var startTime = TimeSpan.Zero; var xAxisTicInterval = TimeSpan.FromSeconds(1); TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(sonoConfig.WindowStep / (double)sonoConfig.SampleRate); var labelInterval = TimeSpan.FromSeconds(5); image = BaseSonogram.FrameSonogram(image, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval); return(image); }
/// <summary> /// /// </summary> /// <param name="wavPath"></param> /// <param name="minHz"></param> /// <param name="maxHz"></param> /// <param name="frameOverlap"></param> /// <param name="threshold"></param> /// <param name="minDuration">used for smoothing intensity as well as for removing short events</param> /// <param name="maxDuration"></param> /// <returns></returns> public static Tuple <BaseSonogram, List <AcousticEvent>, double, double, double, double[]> Execute_Segmentation(FileInfo wavPath, int minHz, int maxHz, double frameOverlap, double smoothWindow, double thresholdSD, double minDuration, double maxDuration) { //i: GET RECORDING AudioRecording recording = new AudioRecording(wavPath.FullName); //ii: MAKE SONOGRAM Log.WriteLine("# Start sonogram."); SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.WindowOverlap = frameOverlap; sonoConfig.SourceFName = recording.BaseName; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); //iii: DETECT SEGMENTS Log.WriteLine("# Start event detection"); var tuple = AcousticEvent.GetSegmentationEvents((SpectrogramStandard)sonogram, TimeSpan.Zero, minHz, maxHz, smoothWindow, thresholdSD, minDuration, maxDuration); var tuple2 = Tuple.Create(sonogram, tuple.Item1, tuple.Item2, tuple.Item3, tuple.Item4, tuple.Item5); return(tuple2); }//end Execute_Segmentation
/// <summary> /// Generate a Spectrogram. /// </summary> /// <param name="bytes"> /// The bytes. /// </param> /// <returns> /// Spectrogram image. /// </returns> /// <exception cref="NotSupportedException"><c>NotSupportedException</c>.</exception> public Bitmap Spectrogram(byte[] bytes) { /* * 80 pixels per second is too quick for Silverlight. * we want to use 40 pixels per second (half - window size of 0) */ var sonogramConfig = new SonogramConfig { WindowOverlap = 0, // was 0.5 when ppms was 0.08 WindowSize = 512, DoSnr = false, // might save us some time generating spectrograms. }; Bitmap image; using (var audiorecording = new AudioRecording(bytes)) { // audiorecording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE if (audiorecording.SampleRate != 22050) { var msg = string.Format( "Must be able to convert audio to 22050hz. Audio has sample rate of {0}.", audiorecording.SampleRate); throw new NotSupportedException(msg); } using (var sonogram = new SpectrogramStandard(sonogramConfig, audiorecording.WavReader)) using (var img = sonogram.GetImage()) { image = new Bitmap(img); } } return(image); }
private Lazy <IndexCalculateResult[]> GetLazyIndices <T>( AudioRecording recording, AnalysisSettings analysisSettings, SegmentSettings <T> segmentSettings, AcousticIndices.AcousticIndicesConfig acousticConfiguration) { // Convert the Config config to IndexCalculateConfig class and merge in the unnecesary parameters. IndexCalculateResult[] Callback() { IndexCalculateResult[] subsegmentResults = AcousticIndices.CalculateIndicesInSubsegments( recording, segmentSettings.SegmentStartOffset, segmentSettings.AnalysisIdealSegmentDuration, acousticConfiguration.IndexCalculationDuration.Seconds(), acousticConfiguration.IndexProperties, segmentSettings.Segment.SourceMetadata.SampleRate, acousticConfiguration); return(subsegmentResults); } return(new Lazy <IndexCalculateResult[]>(Callback, LazyThreadSafetyMode.ExecutionAndPublication)); }
/// <summary> /// cut audio to subsegments of desired length. /// return list of subsegments /// </summary> public static List <AudioRecording> GetSubsegmentsSamples(AudioRecording recording, double subsegmentDurationInSeconds, double frameStep) { List <AudioRecording> subsegments = new List <AudioRecording>(); int sampleRate = recording.WavReader.SampleRate; var segmentDuration = recording.WavReader.Time.TotalSeconds; int segmentSampleCount = (int)(segmentDuration * sampleRate); int subsegmentSampleCount = (int)(subsegmentDurationInSeconds * sampleRate); double subsegmentFrameCount = subsegmentSampleCount / (double)frameStep; subsegmentFrameCount = (int)subsegmentFrameCount; subsegmentSampleCount = ((int)(subsegmentFrameCount * frameStep) < subsegmentSampleCount) ? subsegmentSampleCount : (int)(subsegmentFrameCount * frameStep); for (int i = 0; i < (int)(segmentSampleCount / subsegmentSampleCount); i++) { AudioRecording subsegmentRecording = recording; double[] subsamples = DataTools.Subarray(recording.WavReader.Samples, i * subsegmentSampleCount, subsegmentSampleCount); var wr = new Acoustics.Tools.Wav.WavReader(subsamples, 1, 16, sampleRate); subsegmentRecording = new AudioRecording(wr); subsegments.Add(subsegmentRecording); } return(subsegments); }
private void OnRecord(object sender, RoutedEventArgs e) { this.samples = new List <float>(); this.isRecording = true; string filename = GetNextWavFileName(); try { this.recording = this.selectedDevice.StartRecording(filename, (s, sample) => { // handle PCM data on background thread // UiDispatcher.RunOnUIThread(new Action(() => { if (this.isRecording) { CollectSamples(sample); } if (sample.Error != null) { ShowStatus(sample.Error); } else if (!sample.Closed) { // keep pumping no matter what so we don't get any buffering on next recording. this.recording.ReadNextFrame(); } })); }); } catch (Exception ex) { ShowStatus(ex.Message); } SetButtonState(); }
/// <summary> /// Запускает голосовое управление /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private async void RecordAndAnalyze(object sender, EventArgs e) { try { MainGrid.IsEnabled = false; Vibration.Vibrate(); BottomPanel.BackgroundColor = Color.FromHex("#C00000"); if (await AudioRecording.CheckAudioPermissions()) { await AudioRecording.RecordAudio(); await AnalizeCommandHandwritten(); } } catch (Exception) { await Navigation.PushAsync(new SomethingWentWrongPage()); } finally { MainGrid.IsEnabled = true; BottomPanel.BackgroundColor = Color.Black; } }
public AnalysisResult2 Analyze <T>(AnalysisSettings analysisSettings, SegmentSettings <T> segmentSettings) { var acousticIndicesConfiguration = (AcousticIndicesConfig)analysisSettings.AnalysisAnalyzerSpecificConfiguration; var indexCalculationDuration = acousticIndicesConfiguration.IndexCalculationDuration.Seconds(); var audioFile = segmentSettings.SegmentAudioFile; var recording = new AudioRecording(audioFile.FullName); var outputDirectory = segmentSettings.SegmentOutputDirectory; var analysisResults = new AnalysisResult2(analysisSettings, segmentSettings, recording.Duration); analysisResults.AnalysisIdentifier = this.Identifier; // calculate indices for each subsegment IndexCalculateResult[] subsegmentResults = CalculateIndicesInSubsegments( recording, segmentSettings.SegmentStartOffset, segmentSettings.AnalysisIdealSegmentDuration, indexCalculationDuration, acousticIndicesConfiguration.IndexProperties, segmentSettings.Segment.SourceMetadata.SampleRate, acousticIndicesConfiguration); var trackScores = new List <Plot>(subsegmentResults.Length); var tracks = new List <Track>(subsegmentResults.Length); analysisResults.SummaryIndices = new SummaryIndexBase[subsegmentResults.Length]; analysisResults.SpectralIndices = new SpectralIndexBase[subsegmentResults.Length]; for (int i = 0; i < subsegmentResults.Length; i++) { var indexCalculateResult = subsegmentResults[i]; indexCalculateResult.SummaryIndexValues.FileName = segmentSettings.Segment.SourceMetadata.Identifier; indexCalculateResult.SpectralIndexValues.FileName = segmentSettings.Segment.SourceMetadata.Identifier; analysisResults.SummaryIndices[i] = indexCalculateResult.SummaryIndexValues; analysisResults.SpectralIndices[i] = indexCalculateResult.SpectralIndexValues; trackScores.AddRange(indexCalculateResult.TrackScores); if (indexCalculateResult.Tracks != null) { tracks.AddRange(indexCalculateResult.Tracks); } } if (analysisSettings.AnalysisDataSaveBehavior) { this.WriteSummaryIndicesFile(segmentSettings.SegmentSummaryIndicesFile, analysisResults.SummaryIndices); analysisResults.SummaryIndicesFile = segmentSettings.SegmentSummaryIndicesFile; } if (analysisSettings.AnalysisDataSaveBehavior) { analysisResults.SpectraIndicesFiles = WriteSpectrumIndicesFilesCustom( segmentSettings.SegmentSpectrumIndicesDirectory, Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name), analysisResults.SpectralIndices); } // write the segment spectrogram (typically of one minute duration) to CSV // this is required if you want to produced zoomed spectrograms at a resolution greater than 0.2 seconds/pixel bool saveSonogramData = analysisSettings.Configuration.GetBoolOrNull(AnalysisKeys.SaveSonogramData) ?? false; if (saveSonogramData || analysisSettings.AnalysisImageSaveBehavior.ShouldSave(analysisResults.Events.Length)) { var sonoConfig = new SonogramConfig(); // default values config sonoConfig.SourceFName = recording.FilePath; sonoConfig.WindowSize = acousticIndicesConfiguration.FrameLength; sonoConfig.WindowStep = analysisSettings.Configuration.GetIntOrNull(AnalysisKeys.FrameStep) ?? sonoConfig.WindowSize; // default = no overlap sonoConfig.WindowOverlap = (sonoConfig.WindowSize - sonoConfig.WindowStep) / (double)sonoConfig.WindowSize; // Linear or Octave frequency scale? bool octaveScale = analysisSettings.Configuration.GetBoolOrNull(AnalysisKeys.KeyOctaveFreqScale) ?? false; if (octaveScale) { sonoConfig.WindowStep = sonoConfig.WindowSize; sonoConfig.WindowOverlap = (sonoConfig.WindowSize - sonoConfig.WindowStep) / (double)sonoConfig.WindowSize; } ////sonoConfig.NoiseReductionType = NoiseReductionType.NONE; // the default ////sonoConfig.NoiseReductionType = NoiseReductionType.STANDARD; var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // remove the DC row of the spectrogram sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); if (analysisSettings.AnalysisImageSaveBehavior.ShouldSave()) { string imagePath = Path.Combine(outputDirectory.FullName, segmentSettings.SegmentImageFile.Name); // NOTE: hits (SPT in this case) is intentionally not supported var image = DrawSonogram(sonogram, null, trackScores, tracks); image.Save(imagePath); analysisResults.ImageFile = new FileInfo(imagePath); } if (saveSonogramData) { string csvPath = Path.Combine(outputDirectory.FullName, recording.BaseName + ".csv"); Csv.WriteMatrixToCsv(csvPath.ToFileInfo(), sonogram.Data); } } return(analysisResults); }
public static IndexCalculateResult[] CalculateIndicesInSubsegments( AudioRecording recording, TimeSpan segmentStartOffset, TimeSpan segmentDuration, TimeSpan indexCalculationDuration, Dictionary <string, IndexProperties> indexProperties, int sampleRateOfOriginalAudioFile, IndexCalculateConfig config) { if (recording.WavReader.Channels > 1) { throw new InvalidOperationException( @"A multi-channel recording MUST be mixed down to MONO before calculating acoustic indices!"); } double recordingDuration = recording.Duration.TotalSeconds; double subsegmentDuration = indexCalculationDuration.TotalSeconds; // intentional possible null ref, throw if not null double segmentDurationSeconds = segmentDuration.TotalSeconds; double audioCuttingError = subsegmentDuration - segmentDurationSeconds; // using the expected duration, each call to analyze will always produce the same number of results // round, we expect perfect numbers, warn if not double subsegmentsInSegment = segmentDurationSeconds / subsegmentDuration; int subsegmentCount = (int)Math.Round(segmentDurationSeconds / subsegmentDuration); const double warningThreshold = 0.01; // 1% double fraction = subsegmentsInSegment - subsegmentCount; if (Math.Abs(fraction) > warningThreshold) { Log.Warn( string.Format( "The IndexCalculationDuration ({0}) does not fit well into the provided segment ({1}). This means a partial result has been {3}, {2} results will be calculated", subsegmentDuration, segmentDurationSeconds, subsegmentCount, fraction >= 0.5 ? "added" : "removed")); } Log.Trace(subsegmentCount + " sub segments will be calculated"); var indexCalculateResults = new IndexCalculateResult[subsegmentCount]; // calculate indices for each subsegment for (int i = 0; i < subsegmentCount; i++) { var subsegmentOffset = segmentStartOffset + TimeSpan.FromSeconds(i * subsegmentDuration); var indexCalculateResult = IndexCalculate.Analysis( recording, subsegmentOffset, indexProperties, sampleRateOfOriginalAudioFile, segmentStartOffset, config); indexCalculateResults[i] = indexCalculateResult; } return(indexCalculateResults); }
/// <summary> /// THE KEY ANALYSIS METHOD /// </summary> /// <param name="recording"> /// The segment Of Source File. /// </param> /// <param name="configDict"> /// The config Dict. /// </param> /// <param name="value"></param> /// <returns> /// The <see cref="LimnodynastesConvexResults"/>. /// </returns> internal static LimnodynastesConvexResults Analysis( Dictionary <string, double[, ]> dictionaryOfHiResSpectralIndices, AudioRecording recording, Dictionary <string, string> configDict, AnalysisSettings analysisSettings, SegmentSettingsBase segmentSettings) { // for Limnodynastes convex, in the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // for Limnodynastes convex, in the JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So strategy is to look for three peaks separated by same amount and in the vicinity of the above, // starting with highest power (the top peak) and working down to lowest power (bottom peak). var outputDir = segmentSettings.SegmentOutputDirectory; TimeSpan segmentStartOffset = segmentSettings.SegmentStartOffset; //KeyValuePair<string, double[,]> kvp = dictionaryOfHiResSpectralIndices.First(); var spg = dictionaryOfHiResSpectralIndices["RHZ"]; int rhzRowCount = spg.GetLength(0); int rhzColCount = spg.GetLength(1); int sampleRate = recording.SampleRate; double herzPerBin = sampleRate / 2 / (double)rhzRowCount; double scoreThreshold = (double?)double.Parse(configDict["EventThreshold"]) ?? 3.0; int minimumFrequency = (int?)int.Parse(configDict["MinHz"]) ?? 850; int dominantFrequency = (int?)int.Parse(configDict["DominantFrequency"]) ?? 1850; // # The Limnodynastes call has three major peaks. The dominant peak is at 1850 or as set above. // # The second and third peak are at equal gaps below. DominantFreq-gap and DominantFreq-(2*gap); // # Set the gap in the Config file. Should typically be in range 880 to 970 int peakGapInHerz = (int?)int.Parse(configDict["PeakGap"]) ?? 470; int F1AndF2Gap = (int)Math.Round(peakGapInHerz / herzPerBin); //int F1AndF2Gap = 10; // 10 = number of freq bins int F1AndF3Gap = 2 * F1AndF2Gap; //int F1AndF3Gap = 20; int hzBuffer = 250; int bottomBin = 5; int dominantBin = (int)Math.Round(dominantFrequency / herzPerBin); int binBuffer = (int)Math.Round(hzBuffer / herzPerBin);; int dominantBinMin = dominantBin - binBuffer; int dominantBinMax = dominantBin + binBuffer; // freqBin + rowID = binCount - 1; // therefore: rowID = binCount - freqBin - 1; int minRowID = rhzRowCount - dominantBinMax - 1; int maxRowID = rhzRowCount - dominantBinMin - 1; int bottomRow = rhzRowCount - bottomBin - 1; var list = new List <Point>(); // loop through all spectra/columns of the hi-res spectrogram. for (int c = 1; c < rhzColCount - 1; c++) { double maxAmplitude = -double.MaxValue; int idOfRowWithMaxAmplitude = 0; for (int r = minRowID; r <= bottomRow; r++) { if (spg[r, c] > maxAmplitude) { maxAmplitude = spg[r, c]; idOfRowWithMaxAmplitude = r; } } if (idOfRowWithMaxAmplitude < minRowID) { continue; } if (idOfRowWithMaxAmplitude > maxRowID) { continue; } // want a spectral peak. if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c - 1]) { continue; } if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c + 1]) { continue; } // peak should exceed thresold amplitude if (spg[idOfRowWithMaxAmplitude, c] < 3.0) { continue; } // convert row ID to freq bin ID int freqBinID = rhzRowCount - idOfRowWithMaxAmplitude - 1; list.Add(new Point(c, freqBinID)); // we now have a list of potential hits for LimCon. This needs to be filtered. // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // DEBUG ONLY // ################################ TEMPORARY ################################ // superimpose point on RHZ HiRes spectrogram for debug purposes bool drawOnHiResSpectrogram = true; //string filePath = @"G:\SensorNetworks\Output\Frogs\TestOfHiResIndices-2016July\Test\Towsey.HiResIndices\SpectrogramImages\3mile_creek_dam_-_Herveys_Range_1076_248366_20130305_001700_30_0min.CombinedGreyScale.png"; var fileName = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name); string filePath = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScale.png"; var debugImage = new FileInfo(filePath); if (!debugImage.Exists) { drawOnHiResSpectrogram = false; } if (drawOnHiResSpectrogram) { // put red dot where max is Bitmap bmp = new Bitmap(filePath); foreach (Point point in list) { bmp.SetPixel(point.X + 70, 1911 - point.Y, Color.Red); } // mark off every tenth frequency bin for (int r = 0; r < 26; r++) { bmp.SetPixel(68, 1911 - (r * 10), Color.Blue); bmp.SetPixel(69, 1911 - (r * 10), Color.Blue); } // mark off upper bound and lower frequency bound bmp.SetPixel(69, 1911 - dominantBinMin, Color.Lime); bmp.SetPixel(69, 1911 - dominantBinMax, Color.Lime); //bmp.SetPixel(69, 1911 - maxRowID, Color.Lime); string opFilePath = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScaleAnnotated.png"; bmp.Save(opFilePath); } // END DEBUG ################################ TEMPORARY ################################ // now construct the standard decibel spectrogram WITHOUT noise removal, and look for LimConvex // get frame parameters for the analysis double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); int frameSize = rhzRowCount * 2; int frameStep = frameSize; // this default = zero overlap double frameDurationInSeconds = frameSize / (double)sampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; //var dspOutput = DSP_Frames.ExtractEnvelopeAndFFTs(recording, frameSize, frameStep); //// Generate deciBel spectrogram //double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.amplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon); // i: Init SONOGRAM config var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = 0.0, NoiseReductionType = NoiseReductionType.None, }; // init sonogram BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // remove the DC row of the spectrogram sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); //scores.Add(new Plot("Decibels", DataTools.NormaliseMatrixValues(dBArray), ActivityAndCover.DefaultActivityThresholdDb)); //scores.Add(new Plot("Active Frames", DataTools.Bool2Binary(activity.activeFrames), 0.0)); // convert spectral peaks to frequency //var tuple_DecibelPeaks = SpectrogramTools.HistogramOfSpectralPeaks(deciBelSpectrogram); //int[] peaksBins = tuple_DecibelPeaks.Item2; //double[] freqPeaks = new double[peaksBins.Length]; //int binCount = sonogram.Data.GetLength(1); //for (int i = 1; i < peaksBins.Length; i++) freqPeaks[i] = (lowerBinBound + peaksBins[i]) / (double)nyquistBin; //scores.Add(new Plot("Max Frequency", freqPeaks, 0.0)); // location of peaks for spectral images // create new list of LimCon hits in the standard spectrogram. double timeSpanOfFrameInSeconds = frameSize / (double)sampleRate; var newList = new List <int[]>(); int lastFrameID = sonogram.Data.GetLength(0) - 1; int lastBinID = sonogram.Data.GetLength(1) - 1; foreach (Point point in list) { double secondsFromStartOfSegment = (point.X * 0.1) + 0.05; // convert point.Y to center of time-block. int framesFromStartOfSegment = (int)Math.Round(secondsFromStartOfSegment / timeSpanOfFrameInSeconds); // location of max point is uncertain, so search in neighbourhood. // NOTE: sonogram.data matrix is time*freqBin double maxValue = -double.MaxValue; int idOfTMax = framesFromStartOfSegment; int idOfFMax = point.Y; for (int deltaT = -4; deltaT <= 4; deltaT++) { for (int deltaF = -1; deltaF <= 1; deltaF++) { int newT = framesFromStartOfSegment + deltaT; if (newT < 0) { newT = 0; } else if (newT > lastFrameID) { newT = lastFrameID; } double value = sonogram.Data[newT, point.Y + deltaF]; if (value > maxValue) { maxValue = value; idOfTMax = framesFromStartOfSegment + deltaT; idOfFMax = point.Y + deltaF; } } } // newList.Add(new Point(frameSpan, point.Y)); int[] array = new int[2]; array[0] = idOfTMax; array[1] = idOfFMax; newList.Add(array); } // Now obtain more of spectrogram to see if have peaks at two other places characteristic of Limnodynastes convex. // In the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // For Limnodynastes convex, in the JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So strategy is to look for three peaks separated by same amount and in the vicinity of the above, // starting with highest power (the top peak) and working down to lowest power (bottom peak). //We have found top/highest peak - now find the other two. int secondDominantFrequency = 1380; int secondDominantBin = (int)Math.Round(secondDominantFrequency / herzPerBin); int thirdDominantFrequency = 900; int thirdDominantBin = (int)Math.Round(thirdDominantFrequency / herzPerBin); var acousticEvents = new List <AcousticEvent>(); int Tbuffer = 2; // First extract a sub-matrix. foreach (int[] array in newList) { // NOTE: sonogram.data matrix is time*freqBin int Tframe = array[0]; int F1bin = array[1]; double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - Tbuffer, 0, Tframe + Tbuffer, F1bin); double F1power = subMatrix[Tbuffer, F1bin]; // convert to vector var spectrum = MatrixTools.GetColumnAverages(subMatrix); // use the following code to get estimate of background noise double[,] powerMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - 3, 10, Tframe + 3, F1bin); double averagePower = (MatrixTools.GetRowAverages(powerMatrix)).Average(); double score = F1power - averagePower; // debug - checking what the spectrum looks like. //for (int i = 0; i < 18; i++) // spectrum[i] = -100.0; //DataTools.writeBarGraph(spectrum); // locate the peaks in lower frequency bands, F2 and F3 bool[] peaks = DataTools.GetPeaks(spectrum); int F2bin = 0; double F2power = -200.0; // dB for (int i = -3; i <= 2; i++) { int bin = F1bin - F1AndF2Gap + i; if ((peaks[bin]) && (F2power < subMatrix[1, bin])) { F2bin = bin; F2power = subMatrix[1, bin]; } } if (F2bin == 0) { continue; } if (F2power == -200.0) { continue; } score += (F2power - averagePower); int F3bin = 0; double F3power = -200.0; for (int i = -5; i <= 2; i++) { int bin = F1bin - F1AndF3Gap + i; if ((peaks[bin]) && (F3power < subMatrix[1, bin])) { F3bin = bin; F3power = subMatrix[1, bin]; } } if (F3bin == 0) { continue; } if (F3power == -200.0) { continue; } score += (F3power - averagePower); score /= 3; // ignore events where SNR < decibel threshold if (score < scoreThreshold) { continue; } // ignore events with wrong power distribution. A good LimnoConvex call has strongest F1 power if ((F3power > F1power) || (F2power > F1power)) { continue; } //freq Bin ID must be converted back to Matrix row ID // freqBin + rowID = binCount - 1; // therefore: rowID = binCount - freqBin - 1; minRowID = rhzRowCount - F1bin - 2; maxRowID = rhzRowCount - F3bin - 1; int F1RowID = rhzRowCount - F1bin - 1; int F2RowID = rhzRowCount - F2bin - 1; int F3RowID = rhzRowCount - F3bin - 1; int maxfreq = dominantFrequency + hzBuffer; int topBin = (int)Math.Round(maxfreq / herzPerBin); int frameCount = 4; double duration = frameCount * frameStepInSeconds; double startTimeWrtSegment = (Tframe - 2) * frameStepInSeconds; // Got to here so start initialising an acoustic event var ae = new AcousticEvent(segmentStartOffset, startTimeWrtSegment, duration, minimumFrequency, maxfreq); ae.SetTimeAndFreqScales(framesPerSec, herzPerBin); //var ae = new AcousticEvent(oblong, recording.Nyquist, binCount, frameDurationInSeconds, frameStepInSeconds, frameCount); //ae.StartOffset = TimeSpan.FromSeconds(Tframe * frameStepInSeconds); var pointF1 = new Point(2, topBin - F1bin); var pointF2 = new Point(2, topBin - F2bin); var pointF3 = new Point(2, topBin - F3bin); ae.Points = new List <Point>(); ae.Points.Add(pointF1); ae.Points.Add(pointF2); ae.Points.Add(pointF3); //tried using HitElements but did not do what I wanted later on. //ae.HitElements = new HashSet<Point>(); //ae.HitElements = new SortedSet<Point>(); //ae.HitElements.Add(pointF1); //ae.HitElements.Add(pointF2); //ae.HitElements.Add(pointF3); ae.Score = score; //ae.MinFreq = Math.Round((topBin - F3bin - 5) * herzPerBin); //ae.MaxFreq = Math.Round(topBin * herzPerBin); acousticEvents.Add(ae); } // now add in extra common info to the acoustic events acousticEvents.ForEach(ae => { ae.SpeciesName = configDict[AnalysisKeys.SpeciesName]; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recording.Duration.TotalSeconds; ae.Name = abbreviatedName; ae.BorderColour = Color.Red; ae.FileName = recording.BaseName; }); double[] scores = new double[rhzColCount]; // predefinition of score array double nomalisationConstant = scoreThreshold * 4; // four times the score threshold double compressionFactor = rhzColCount / (double)sonogram.Data.GetLength(0); foreach (AcousticEvent ae in acousticEvents) { ae.ScoreNormalised = ae.Score / nomalisationConstant; if (ae.ScoreNormalised > 1.0) { ae.ScoreNormalised = 1.0; } int frameID = (int)Math.Round(ae.EventStartSeconds / frameDurationInSeconds); int hiresFrameID = (int)Math.Floor(frameID * compressionFactor); scores[hiresFrameID] = ae.ScoreNormalised; } var plot = new Plot(AnalysisName, scores, scoreThreshold); // DEBUG ONLY ################################ TEMPORARY ################################ // Draw a standard spectrogram and mark of hites etc. bool createStandardDebugSpectrogram = true; var imageDir = new DirectoryInfo(outputDir.FullName + @"\SpectrogramImages"); if (!imageDir.Exists) { imageDir.Create(); } if (createStandardDebugSpectrogram) { var fileName2 = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name); string filePath2 = Path.Combine(imageDir.FullName, fileName + ".Spectrogram.png"); Bitmap sonoBmp = (Bitmap)sonogram.GetImage(); int height = sonoBmp.Height; foreach (AcousticEvent ae in acousticEvents) { ae.DrawEvent(sonoBmp); //g.DrawRectangle(pen, ob.ColumnLeft, ob.RowTop, ob.ColWidth-1, ob.RowWidth); //ae.DrawPoint(sonoBmp, ae.HitElements.[0], Color.OrangeRed); //ae.DrawPoint(sonoBmp, ae.HitElements[1], Color.Yellow); //ae.DrawPoint(sonoBmp, ae.HitElements[2], Color.Green); ae.DrawPoint(sonoBmp, ae.Points[0], Color.OrangeRed); ae.DrawPoint(sonoBmp, ae.Points[1], Color.Yellow); ae.DrawPoint(sonoBmp, ae.Points[2], Color.LimeGreen); } // draw the original hits on the standard sonogram foreach (int[] array in newList) { sonoBmp.SetPixel(array[0], height - array[1], Color.Cyan); } // mark off every tenth frequency bin on the standard sonogram for (int r = 0; r < 20; r++) { sonoBmp.SetPixel(0, height - (r * 10) - 1, Color.Blue); sonoBmp.SetPixel(1, height - (r * 10) - 1, Color.Blue); } // mark off upper bound and lower frequency bound sonoBmp.SetPixel(0, height - dominantBinMin, Color.Lime); sonoBmp.SetPixel(0, height - dominantBinMax, Color.Lime); sonoBmp.Save(filePath2); } // END DEBUG ################################ TEMPORARY ################################ return(new LimnodynastesConvexResults { Sonogram = sonogram, Hits = null, Plot = plot, Events = acousticEvents, RecordingDuration = recording.Duration, }); } // Analysis()
public static void GenerateSpectrograms() { var recordingDir = @"M:\Liz\SupervisedPatchSamplingSet\Recordings\"; var resultDir = @"M:\Liz\SupervisedPatchSamplingSet\"; // check whether there is any file in the folder/subfolders if (Directory.GetFiles(recordingDir, "*", SearchOption.AllDirectories).Length == 0) { throw new ArgumentException("The folder of recordings is empty..."); } int frameSize = 1024; int finalBinCount = 256; FreqScaleType scaleType = FreqScaleType.Mel; var settings = new SpectrogramSettings() { WindowSize = frameSize, // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second // The "WindowOverlap" is calculated to answer this question // each 24 single-frames duration is equal to 1 second // note that the "WindowOverlap" value should be recalculated if frame size is changed // this has not yet been considered in the Config file! WindowOverlap = 0.10725204, DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; foreach (string filePath in Directory.GetFiles(recordingDir, "*.wav")) { FileInfo fileInfo = filePath.ToFileInfo(); // process the wav file if it is not empty if (fileInfo.Length != 0) { var recording = new AudioRecording(filePath); settings.SourceFileName = recording.BaseName; var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recording.WavReader); var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram); // DO NOISE REDUCTION decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data); // draw the spectrogram var attributes = new SpectrogramAttributes() { NyquistFrequency = decibelSpectrogram.Attributes.NyquistFrequency, Duration = decibelSpectrogram.Attributes.Duration, }; Image image = DecibelSpectrogram.DrawSpectrogramAnnotated(decibelSpectrogram.Data, settings, attributes); string pathToSpectrogramFiles = Path.Combine(resultDir, "Spectrograms", settings.SourceFileName + ".bmp"); image.Save(pathToSpectrogramFiles); // write the matrix to a csv file string pathToMatrixFiles = Path.Combine(resultDir, "Matrices", settings.SourceFileName + ".csv"); Csv.WriteMatrixToCsv(pathToMatrixFiles.ToFileInfo(), decibelSpectrogram.Data); } } }
/// <summary> /// The CORE ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double decibelThreshold = double.Parse(configDict["DECIBEL_THRESHOLD"]); //dB double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double callDuration = double.Parse(configDict["CALL_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); int callSpan = (int)Math.Round(callDuration * framesPerSecond); //############################################################################################################################################# //ii: DETECT HARMONICS var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan); double[] dBArray = results.Item1; double[] intensity = results.Item2; //an array of periodicity scores double[] periodicity = results.Item3; //intensity = DataTools.filterMovingAverage(intensity, 3); int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < harmonicIntensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. //expect humans to have max < 1000 Hz double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < noiseBound; j++) { spectrum[j] = 0.0; } int maxIndex = DataTools.GetMaxIndex(spectrum); int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); double discount = 1.0; if (freqWithMaxPower < 1200) { discount = 0.0; } if (intensity[r] > harmonicIntensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS double maxPossibleScore = 0.5; int halfCallSpan = callSpan / 2; var predictedEvents = new List <AcousticEvent>(); for (int i = 0; i < rowCount; i++) { //assume one score position per crow call if (scoreArray[i] < 0.001) { continue; } double startTime = (i - halfCallSpan) / framesPerSecond; AcousticEvent ev = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz); ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth); ev.Score = scoreArray[i]; ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold //ev.Score_MaxPossible = maxPossibleScore; predictedEvents.Add(ev); } //for loop Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // BETTER TO CALCULATE THIS. IGNORE USER! // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]); // duration of DCT in seconds double dctDuration = configuration.GetDouble(AnalysisKeys.DctDuration); // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } // The default was 512 for Canetoad. // Set longer Framesize for calls having longer pulse periodicity. const int FrameSize = 128; double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap( recording.SampleRate, FrameSize, maxOscilFreq); //windowOverlap = 0.75; // previous default // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = FrameSize, WindowOverlap = windowOverlap, //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.1, }; // sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("STANDARD"); TimeSpan recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); // double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, (rowCount - 1), maxbin); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER // This window is used to smooth the score array before extracting events. // A short window (e.g. 3) preserves sharper score edges to define events but also keeps noise. int scoreSmoothingWindow = 13; Oscillations2012.Execute( (SpectrogramStandard)sonogram, minHz, maxHz, dctDuration, minOscilFreq, maxOscilFreq, dctThreshold, eventThreshold, minDuration, maxDuration, scoreSmoothingWindow, out var scores, out var oscillationEvents, out var hits, segmentStartOffset); var acousticEvents = oscillationEvents.ConvertSpectralEventsToAcousticEvents(); acousticEvents.ForEach(ae => { ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = abbreviatedSpeciesName; }); var plot = new Plot(this.DisplayName, scores, eventThreshold); var plots = new List <Plot> { plot }; this.WriteDebugImage(recording, outputDirectory, sonogram, acousticEvents, plots, hits); return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plots, Events = acousticEvents, }); }
//////public static IndexCalculateResult Analysis( public static SpectralIndexValuesForContentDescription Analysis( AudioRecording recording, TimeSpan segmentOffsetTimeSpan, int sampleRateOfOriginalAudioFile, bool returnSonogramInfo = false) { // returnSonogramInfo = true; // if debugging double epsilon = recording.Epsilon; int sampleRate = recording.WavReader.SampleRate; //var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); var indexCalculationDuration = TimeSpan.FromSeconds(ContentSignatures.IndexCalculationDurationInSeconds); // Get FRAME parameters for the calculation of Acoustic Indices int frameSize = ContentSignatures.FrameSize; int frameStep = frameSize; // that is, windowOverlap = zero double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second var frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); // INITIALISE a RESULTS STRUCTURE TO return // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc. var config = new IndexCalculateConfig(); // sets some default values int freqBinCount = frameSize / 2; var indexProperties = GetIndexProperties(); ////////var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, segmentOffsetTimeSpan, config); var spectralIndices = new SpectralIndexValuesForContentDescription(); ///////result.SummaryIndexValues = null; ///////SpectralIndexValues spectralIndices = result.SpectralIndexValues; // set up default spectrogram to return ///////result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null; ///////result.Hits = null; ///////result.TrackScores = new List<Plot>(); // ################################## FINISHED SET-UP // ################################## NOW GET THE AMPLITUDE SPECTROGRAM // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT // Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns. var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep); var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram; // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ################################## // Get the oscillation spectral index OSC separately from signal because need a different frame size etc. var sampleLength = Oscillations2014.DefaultSampleLength; var frameLength = Oscillations2014.DefaultFrameLength; var sensitivity = Oscillations2014.DefaultSensitivityThreshold; var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(recording, frameLength, sampleLength, sensitivity); // double length of the vector because want to work with 256 element vector for spectrogram purposes spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort); // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ################################## // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2. // original sample rate can be anything 11.0-44.1 kHz. int originalNyquist = sampleRateOfOriginalAudioFile / 2; // if up-sampling has been done if (dspOutput1.NyquistFreq > originalNyquist) { dspOutput1.NyquistFreq = originalNyquist; dspOutput1.NyquistBin = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that bin width does not change } // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX spectralIndices.ACI = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram); // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram); for (int i = 0; i < temporalEntropySpectrum.Length; i++) { temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i]; } spectralIndices.ENT = temporalEntropySpectrum; // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ################################## // i: Convert amplitude spectrogram to decibels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); spectralIndices.BGN = spectralDecibelBgn; // ii: Calculate the noise reduced decibel spectrogram derived from segment recording. // REUSE the var decibelSpectrogram but this time using dspOutput1. decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(decibelSpectrogram); // ###################################################################################################################################################### // iv: CALCULATE SPECTRAL COVER. NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0 // FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth // dB THRESHOLD for calculating spectral coverage double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb; // Calculate lower and upper boundary bin ids. // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from bio-phony. int midFreqBound = config.MidFreqBound; int lowFreqBound = config.LowFreqBound; int lowerBinBound = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth); int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth); var spActivity = ActivityAndCover.CalculateSpectralEvents(decibelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound); //spectralIndices.CVR = spActivity.CoverSpectrum; spectralIndices.EVN = spActivity.EventSpectrum; ///////result.TrackScores = null; ///////return result; return(spectralIndices); } // end calculation of Six Spectral Indices
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new LitoriaNasutaConfig(); recognizerConfig.ReadConfigFile(configuration); // BETTER TO SET THESE. IGNORE USER! // this default framesize seems to work const int frameSize = 1024; const double windowOverlap = 0.0; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // use the default HAMMING window //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.None NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.0, }; TimeSpan recordingDuration = recording.WavReader.Time; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1; var decibelThreshold = 3.0; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER int rowCount = sonogram.Data.GetLength(0); double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); //double[] topBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, maxBin + 3, (rowCount - 1), maxBin + 9); //double[] botBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin - 3, (rowCount - 1), minBin - 9); // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER var acousticEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeArray, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, decibelThreshold, recognizerConfig.MinDuration, recognizerConfig.MaxDuration, segmentStartOffset); double[,] hits = null; var prunedEvents = new List <AcousticEvent>(); acousticEvents.ForEach(ae => { ae.SpeciesName = recognizerConfig.SpeciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; }); var thresholdedPlot = new double[amplitudeArray.Length]; for (int x = 0; x < amplitudeArray.Length; x++) { if (amplitudeArray[x] > decibelThreshold) { thresholdedPlot[x] = amplitudeArray[x]; } } var maxDb = amplitudeArray.MaxOrDefault(); double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(thresholdedPlot, decibelThreshold, out normalisedScores, out normalisedThreshold); var text = string.Format($"{this.DisplayName} (Fullscale={maxDb:f1}dB)"); var plot = new Plot(text, normalisedScores, normalisedThreshold); if (true) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { plot, amplPlot }; // NOTE: This DrawDebugImage() method can be over-written in this class. var debugImage = DrawDebugImage(sonogram, acousticEvents, debugPlots, hits); var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); debugImage.Save(debugPath); } return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = acousticEvents, }); }
public void OctaveFrequencyScale2() { var recordingPath = PathHelper.ResolveAsset(@"Recordings\MarineJasco_AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56-16bit-60sec.wav"); var opFileStem = "JascoMarineGBR1"; var outputDir = this.outputDirectory; var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Octave2ScaleSonogram.png"); var recording = new AudioRecording(recordingPath); var fst = FreqScaleType.Linear125Octaves7Tones28Nyquist32000; var freqScale = new FrequencyScale(fst); var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = 0.2, SourceFName = recording.BaseName, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); // DO NOISE REDUCTION var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); sonogram.Data = dataMatrix; sonogram.Configuration.WindowSize = freqScale.WindowSize; var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image.Save(outputImagePath, ImageFormat.Png); // DO FILE EQUALITY TESTS // Check that freqScale.OctaveBinBounds are correct var stemOfExpectedFile = opFileStem + "_Octave2ScaleBinBounds.EXPECTED.json"; var stemOfActualFile = opFileStem + "_Octave2ScaleBinBounds.ACTUAL.json"; var expectedFile1 = PathHelper.ResolveAsset("FrequencyScale\\" + stemOfExpectedFile); if (!expectedFile1.Exists) { LoggedConsole.WriteErrorLine("An EXPECTED results file does not exist. Test will fail!"); LoggedConsole.WriteErrorLine( $"If ACTUAL results file is correct, move it to dir `{PathHelper.TestResources}` and change its suffix to <.EXPECTED.json>"); } var resultFile1 = new FileInfo(Path.Combine(outputDir.FullName, stemOfActualFile)); Json.Serialise(resultFile1, freqScale.BinBounds); FileEqualityHelpers.TextFileEqual(expectedFile1, resultFile1); // Check that freqScale.GridLineLocations are correct stemOfExpectedFile = opFileStem + "_Octave2ScaleGridLineLocations.EXPECTED.json"; stemOfActualFile = opFileStem + "_Octave2ScaleGridLineLocations.ACTUAL.json"; var expectedFile2 = PathHelper.ResolveAsset("FrequencyScale\\" + stemOfExpectedFile); if (!expectedFile2.Exists) { LoggedConsole.WriteErrorLine("An EXPECTED results file does not exist. Test will fail!"); LoggedConsole.WriteErrorLine( $"If ACTUAL results file is correct, move it to dir `{PathHelper.TestResources}` and change its suffix to <.EXPECTED.json>"); } var resultFile2 = new FileInfo(Path.Combine(outputDir.FullName, stemOfActualFile)); Json.Serialise(resultFile2, freqScale.GridLineLocations); FileEqualityHelpers.TextFileEqual(expectedFile2, resultFile2); // Check that image dimensions are correct Assert.AreEqual(201, image.Width); Assert.AreEqual(310, image.Height); }