/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize( AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // this default framesize seems to work for Lewin's Rail const int frameSize = 512; // DO NOT SET windowOverlap. Calculate it below. if (imageWidth == null) { throw new ArgumentNullException(nameof(imageWidth)); } // check the sample rate. Must be 22050 if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } TimeSpan recordingDuration = recording.WavReader.Time; // check for the profiles in the config file bool hasProfiles = ConfigFile.HasProfiles(configuration); if (!hasProfiles) { throw new ConfigFileException("The Config file for L.pectoralis must contain a profiles object."); } // get the profile names string[] profileNames = ConfigFile.GetProfileNames(configuration); var recognizerConfig = new LewinsRailConfig(); var prunedEvents = new List <AcousticEvent>(); var plots = new List <Plot>(); BaseSonogram sonogram = null; // cycle through the profiles and analyse recording using each of them foreach (var name in profileNames) { Log.Debug($"Reading profile <{name}>."); recognizerConfig.ReadConfigFile(configuration, name); // ignore oscillations above this threshold freq int maxOscilRate = (int)Math.Ceiling(1 / recognizerConfig.MinPeriod); // calculate frame overlap and ignor any user inut. double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap( recording.SampleRate, frameSize, maxOscilRate); // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, //set default values - ignore those set by user WindowSize = frameSize, WindowOverlap = windowOverlap, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //############################################################################################################################################# //DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER var results = Analysis(recording, sonoConfig, recognizerConfig, this.ReturnDebugImage, segmentStartOffset); // ###################################################################### if (results == null) { return(null); //nothing to process } sonogram = results.Item1; //var hits = results.Item2; var scoreArray = results.Item3; var predictedEvents = results.Item4; var debugImage = results.Item5; //############################################################################################################################################# if (debugImage == null) { Log.Debug("DebugImage is null, not writing file"); } else if (MainEntry.InDEBUG) { var imageName = AnalysisResultName(recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); var debugPath = outputDirectory.Combine(imageName); //debugImage.Save(debugPath.FullName); } foreach (var ae in predictedEvents) { // add additional info if (!(ae.Score > recognizerConfig.EventThreshold)) { continue; } ae.Name = recognizerConfig.AbbreviatedSpeciesName; ae.SpeciesName = recognizerConfig.SpeciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; prunedEvents.Add(ae); } // increase very low scores for (int j = 0; j < scoreArray.Length; j++) { scoreArray[j] *= 4; if (scoreArray[j] > 1.0) { scoreArray[j] = 1.0; } } var plot = new Plot(this.DisplayName, scoreArray, recognizerConfig.EventThreshold); plots.Add(plot); } return(new RecognizerResults() { Sonogram = sonogram, Hits = null, Plots = plots, Events = prunedEvents, }); }
/// <summary> /// THE KEY ANALYSIS METHOD. /// </summary> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LewinsRailConfig lrConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } int sr = recording.SampleRate; int upperBandMinHz = lrConfig.UpperBandMinHz; int upperBandMaxHz = lrConfig.UpperBandMaxHz; int lowerBandMinHz = lrConfig.LowerBandMinHz; int lowerBandMaxHz = lrConfig.LowerBandMaxHz; //double decibelThreshold = lrConfig.DecibelThreshold; //dB //int windowSize = lrConfig.WindowSize; double eventThreshold = lrConfig.EventThreshold; //in 0-1 double minDuration = lrConfig.MinDuration; // seconds double maxDuration = lrConfig.MaxDuration; // seconds double minPeriod = lrConfig.MinPeriod; // seconds double maxPeriod = lrConfig.MaxPeriod; // seconds //double freqBinWidth = sr / (double)windowSize; double freqBinWidth = sr / (double)sonoConfig.WindowSize; //i: MAKE SONOGRAM double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); int step = (int)Math.Round(framesPerSecond); //take one second steps int stepCount = rowCount / step; int sampleLength = 64; //64 frames = 3.7 seconds. Suitable for Lewins Rail. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 3; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if (period < minPeriod || period > maxPeriod) { continue; } // lay down score for sample length for (int j = 0; j < sampleLength; j++) { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //###################################################################### //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 5); var predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerBandMinHz, upperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray, segmentStartOffset); var hits = new double[rowCount, colCount]; //###################################################################### var scorePlot = new Plot("L.pect", intensity, lrConfig.IntensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays DataTools.Normalise(intensity, lrConfig.DecibelThreshold, out var normalisedScores, out var normalisedThreshold); var intensityPlot = new Plot("Intensity", normalisedScores, normalisedThreshold); DataTools.Normalise(periodicity, 10, out normalisedScores, out normalisedThreshold); var periodicityPlot = new Plot("Periodicity", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, intensityPlot, periodicityPlot }; debugImage = DrawDebugImage(sonogram, predictedEvents, debugPlots, hits); } return(Tuple.Create(sonogram, hits, intensity, predictedEvents, debugImage)); } //Analysis()