/// <summary> /// New and alternative version of Lconvex recogniser because discovered that the call is more variable than I first realised. /// </summary> internal RecognizerResults Gruntwork2(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { // make a spectrogram double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1; int frameStep = 512; int sampleRate = audioRecording.SampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; var config = new SonogramConfig { WindowSize = frameStep, // this default = zero overlap WindowOverlap = 0.0, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column // var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); // sonogram.Data = spg; var spg = sonogram.Data; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); double herzPerBin = sampleRate / 2.0 / colCount; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // ## TWO THRESHOLDS // The threshold dB amplitude in the dominant freq bin required to yield an event double eventThresholdDb = configuration.GetDoubleOrNull("PeakThresholdDecibels") ?? 3.0; // minimum score for an acceptable event - that is when processing the score array. double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.5; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512. int callFrameWidth = 5; int callHalfWidth = callFrameWidth / 2; // minimum number of bins covering frequency bandwidth of L.convex call // call has binWidth=25 but we want zero buffer of four bins either side. int callBinWidth = 25; int binSilenceBuffer = 4; int topFrequency = configuration.GetInt("TopFrequency"); // # The Limnodynastes call has a duration of 3-5 frames given the above settings. // # But we will assume 5-7 because sometimes the three harmonics are not exactly alligned!! // # The call has three major peaks. The top peak, typically the dominant peak, is at approx 1850, a value which is set in the convig. // # The second and third peak are at equal gaps below. TopFreq-gap and TopFreq-(2*gap); // # The gap could be set in the Config file, but this is not implemented yet. // Instead the algorithm uses three pre-fixed templates that determine the different kinds ogap. Gap is typically close to 500Hz // In the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // In the Kiyomi's JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So the strategy is to look for three peaks separated by same amount and in the vicinity of the above, // To this end we produce three templates each of length 36, but having 2nd and 3rd peaks at different intervals. var templates = GetLconvexTemplates(callBinWidth, binSilenceBuffer); int templateHeight = templates[0].Length; // NOTE: could give user control over other call features // Such as frequency gap between peaks. But not in this first iteration of the recognizer. //int peakGapInHerz = (int)configuration["PeakGap"]; int searchBand = 8; int topBin = (int)Math.Round(topFrequency / herzPerBin); int bottomBin = topBin - templateHeight - searchBand + 1; if (bottomBin < 0) { Log.Fatal("Template bandwidth exceeds availble bandwidth given your value for top frequency."); } spg = MatrixTools.Submatrix(spg, 0, bottomBin, sonogram.Data.GetLength(0) - 1, topBin); double[,] frames = MatrixTools.Submatrix(spg, 0, 0, callFrameWidth - 1, spg.GetLength(1) - 1); double[] spectrum = MatrixTools.GetColumnSums(frames); // set up arrays for monitoring important event parameters double[] decibels = new double[rowCount]; int[] bottomBins = new int[rowCount]; double[] scores = new double[rowCount]; // predefinition of score array int[] templateIds = new int[rowCount]; double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. for (int s = callFrameWidth; s < rowCount; s++) { double[] rowToRemove = MatrixTools.GetRow(spg, s - callFrameWidth); double[] rowToAdd = MatrixTools.GetRow(spg, s); // shift frame block to the right. for (int b = 0; b < spectrum.Length; b++) { spectrum[b] = spectrum[b] - rowToRemove[b] + rowToAdd[b]; } // now check if frame block matches a template. ScanEventScores(spectrum, templates, out double eventScore, out int eventBottomBin, out int templateId); //hits[rowCount, colCount]; decibels[s - callHalfWidth - 1] = spectrum.Max() / callFrameWidth; bottomBins[s - callHalfWidth - 1] = eventBottomBin + bottomBin; scores[s - callHalfWidth - 1] = eventScore; templateIds[s - callHalfWidth - 1] = templateId; } // loop through all spectra // we now have a score array and decibel array and bottom bin array for the entire spectrogram. // smooth them to find events scores = DataTools.filterMovingAverageOdd(scores, 5); decibels = DataTools.filterMovingAverageOdd(decibels, 3); var peaks = DataTools.GetPeaks(scores); // loop through the score array and find potential events var potentialEvents = new List <AcousticEvent>(); for (int s = callHalfWidth; s < scores.Length - callHalfWidth - 1; s++) { if (!peaks[s]) { continue; } if (scores[s] < similarityThreshold) { continue; } if (decibels[s] < eventThresholdDb) { continue; } // put hits into hits matrix // put cosine score into the score array //for (int s = point.X; s <= point.Y; s++) //{ // hits[s, topBins[s]] = 10; //} int bottomBinForEvent = bottomBins[s]; int topBinForEvent = bottomBinForEvent + templateHeight; int topFreqForEvent = (int)Math.Round(topBinForEvent * herzPerBin); int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin); double startTime = (s - callHalfWidth) * frameStepInSeconds; double durationTime = callFrameWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent) { //Name = string.Empty, // remove name because it hides spectral content of the event. Name = "Lc" + templateIds[s], Score = scores[s], }; newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin); potentialEvents.Add(newEvent); } // display the original score array scores = DataTools.normalise(scores); var scorePlot = new Plot(this.DisplayName + " scores", scores, similarityThreshold); DataTools.Normalise(decibels, eventThresholdDb, out double[] normalisedDb, out double normalisedThreshold); var decibelPlot = new Plot("Decibels", normalisedDb, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, decibelPlot }; if (this.displayDebugImage) { var debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // display the cosine similarity scores var plot = new Plot(this.DisplayName, scores, similarityThreshold); var plots = new List <Plot> { plot }; // add names into the returned events string speciesName = configuration[AnalysisKeys.SpeciesName] ?? this.SpeciesName; foreach (var ae in potentialEvents) { ae.Name = abbreviatedSpeciesName; ae.SpeciesName = speciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }