private void WriteDebugImage( AudioRecording recording, DirectoryInfo outputDirectory, BaseSonogram sonogram, List <AcousticEvent> acousticEvents, List <Plot> plots, double[,] hits) { //DEBUG IMAGE this recogniser only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { Image debugImage1 = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, plots, hits); var debugPath1 = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram1")); debugImage1.Save(debugPath1.FullName); // save new image with longer frame var sonoConfig2 = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 1024, WindowOverlap = 0, //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.1, }; BaseSonogram sonogram2 = new SpectrogramStandard(sonoConfig2, recording.WavReader); var debugPath2 = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram2")); Image debugImage2 = SpectrogramTools.GetSonogramPlusCharts(sonogram2, acousticEvents, plots, null); debugImage2.Save(debugPath2.FullName); } }
public static string MakePath(string directory, string baseName, string format, string tag) { if (string.IsNullOrEmpty(format)) { return(directory); } Contract.Requires(AllFormats.Contains(format)); return(Path.Combine(directory, FilenameHelpers.AnalysisResultName(baseName, tag, format))); }
public override string GetFileBaseName(SortedSet <Layer> calculatedLayers, Layer selectedLayer, Point tileOffsets) { // discard Y coordinate var xOffset = (TimeSpan)this.GetTileIndexes(calculatedLayers, selectedLayer, tileOffsets); var tileDate = this.baseDateUtc.Add(xOffset); var formattedDateTime = tileDate.ToString(AppConfigHelper.Iso8601FileCompatibleDateFormatUtcWithFractionalSeconds); var zoomIndex = (double)this.GetZoomIndex(calculatedLayers, selectedLayer); var basename = FilenameHelpers.AnalysisResultName( this.prefix, this.tag, null, formattedDateTime, zoomIndex.ToString(CultureInfo.InvariantCulture)); return(basename); }
public void WriteDebugImage(string recordingFileName, DirectoryInfo outputDirectory, BaseSonogram sonogram, List <AcousticEvent> events, List <Plot> scores, double[,] hits) { //DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { bool doHighlightSubband = false; bool add1kHzLines = true; Image_MultiTrack image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines, doMelScale: false)); image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond)); if (scores != null) { foreach (Plot plot in scores) { image.AddTrack(ImageTrack.GetNamedScoreTrack(plot.data, 0.0, 1.0, plot.threshold, plot.title)); } //assumes data normalised in 0,1 } if (hits != null) { image.OverlayRainbowTransparency(hits); } if (events.Count > 0) { foreach (AcousticEvent ev in events) // set colour for the events { ev.BorderColour = AcousticEvent.DefaultBorderColor; ev.ScoreColour = AcousticEvent.DefaultScoreColor; } image.AddEvents( events, sonogram.NyquistFrequency, sonogram.Configuration.FreqBinCount, sonogram.FramesPerSecond); } var debugImage = image.GetImage(); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recordingFileName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } }
internal RecognizerResults Gruntwork(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1; // make a spectrogram var config = new SonogramConfig { WindowSize = 256, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; config.WindowOverlap = 0.0; // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); int sampleRate = audioRecording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); int frameSize = config.WindowSize; int frameStep = frameSize; // this default = zero overlap double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; // reading in variables from the config file string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventThresholdDb = 6; // minimum score for an acceptable event - that is when processing the score array. double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 256. int minFrameWidth = 7; int maxFrameWidth = 14; double minDuration = (minFrameWidth - 1) * frameStepInSeconds; double maxDuration = maxFrameWidth * frameStepInSeconds; // Calculate Max Amplitude int binMin = (int)Math.Round(minHz / sonogram.FBinWidth); int binMax = (int)Math.Round(maxHz / sonogram.FBinWidth); int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] scores = new double[rowCount]; // predefinition of score array double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. // mark the hits in hitMatrix for (int s = 0; s < rowCount; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = double.MinValue; int maxId = 0; // loop through bandwidth of L.onvex call and look for dominant frequency for (int binID = 5; binID < binMax; binID++) { if (spectrum[binID] > maxAmplitude) { maxAmplitude = spectrum[binID]; maxId = binID; } } if (maxId < binMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } scores[s] = maxAmplitude; dominantBins[s] = maxId; // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // loop through all spectra // Find average amplitude double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix( sonogram.Data, 0, binMin, rowCount - 1, binMax); var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7); // We now have a list of potential hits for C. tinnula. This needs to be filtered. var startEnds = new List <Point>(); Plot.FindStartsAndEndsOfScoreEvents(highPassFilteredSignal, eventThresholdDb, minFrameWidth, maxFrameWidth, out var prunedScores, out startEnds); // High pass Filter // loop through the score array and find beginning and end of potential events var potentialEvents = new List <AcousticEvent>(); foreach (Point point in startEnds) { // get average of the dominant bin int binSum = 0; int binCount = 0; int eventWidth = point.Y - point.X + 1; for (int s = point.X; s <= point.Y; s++) { if (dominantBins[s] >= binMin) { binSum += dominantBins[s]; binCount++; } } // find average dominant bin for the event int avDominantBin = (int)Math.Round(binSum / (double)binCount); int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * sonogram.FBinWidth); // Get score for the event. // Use a simple template for the honk and calculate cosine similarity to the template. // Template has three dominant frequenices. // minimum number of bins covering frequency bandwidth of C. tinnula call// minimum number of bins covering frequency bandwidth of L.convex call int callBinWidth = 14; var templates = GetCtinnulaTemplates(callBinWidth); var eventMatrix = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1); double eventScore = GetEventScore(eventMatrix, templates); // put hits into hits matrix // put cosine score into the score array for (int s = point.X; s <= point.Y; s++) { hits[s, avDominantBin] = 10; prunedScores[s] = eventScore; } if (eventScore < similarityThreshold) { continue; } int topBinForEvent = avDominantBin + 2; int bottomBinForEvent = topBinForEvent - callBinWidth; double startTime = point.X * frameStepInSeconds; double durationTime = eventWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, minHz, maxHz); newEvent.DominantFreq = avDominantFreq; newEvent.Score = eventScore; newEvent.SetTimeAndFreqScales(framesPerSec, sonogram.FBinWidth); newEvent.Name = string.Empty; // remove name because it hides spectral content of the event. potentialEvents.Add(newEvent); } // display the original score array scores = DataTools.normalise(scores); var debugPlot = new Plot(this.DisplayName, scores, similarityThreshold); // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, eventThresholdDb, out var normalisedScores, out var normalisedThreshold); var ampltdPlot = new Plot("Average amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(highPassFilteredSignal, eventThresholdDb, out normalisedScores, out normalisedThreshold); var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold); /* * DataTools.Normalise(scores, eventThresholdDb, out normalisedScores, out normalisedThreshold); * var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold); * * * DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); * var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold); */ var debugPlots = new List <Plot> { ampltdPlot, demeanedPlot }; Image debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, null); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // display the cosine similarity scores var plot = new Plot(this.DisplayName, prunedScores, similarityThreshold); var plots = new List <Plot> { plot }; // add names into the returned events foreach (AcousticEvent ae in potentialEvents) { ae.Name = "speciesName"; // abbreviatedSpeciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
/// <summary> /// New and alternative version of Lconvex recogniser because discovered that the call is more variable than I first realised. /// </summary> internal RecognizerResults Gruntwork2(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { // make a spectrogram double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1; int frameStep = 512; int sampleRate = audioRecording.SampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; var config = new SonogramConfig { WindowSize = frameStep, // this default = zero overlap WindowOverlap = 0.0, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column // var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); // sonogram.Data = spg; var spg = sonogram.Data; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); double herzPerBin = sampleRate / 2.0 / colCount; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // ## TWO THRESHOLDS // The threshold dB amplitude in the dominant freq bin required to yield an event double eventThresholdDb = configuration.GetDoubleOrNull("PeakThresholdDecibels") ?? 3.0; // minimum score for an acceptable event - that is when processing the score array. double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.5; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512. int callFrameWidth = 5; int callHalfWidth = callFrameWidth / 2; // minimum number of bins covering frequency bandwidth of L.convex call // call has binWidth=25 but we want zero buffer of four bins either side. int callBinWidth = 25; int binSilenceBuffer = 4; int topFrequency = configuration.GetInt("TopFrequency"); // # The Limnodynastes call has a duration of 3-5 frames given the above settings. // # But we will assume 5-7 because sometimes the three harmonics are not exactly alligned!! // # The call has three major peaks. The top peak, typically the dominant peak, is at approx 1850, a value which is set in the convig. // # The second and third peak are at equal gaps below. TopFreq-gap and TopFreq-(2*gap); // # The gap could be set in the Config file, but this is not implemented yet. // Instead the algorithm uses three pre-fixed templates that determine the different kinds ogap. Gap is typically close to 500Hz // In the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // In the Kiyomi's JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So the strategy is to look for three peaks separated by same amount and in the vicinity of the above, // To this end we produce three templates each of length 36, but having 2nd and 3rd peaks at different intervals. var templates = GetLconvexTemplates(callBinWidth, binSilenceBuffer); int templateHeight = templates[0].Length; // NOTE: could give user control over other call features // Such as frequency gap between peaks. But not in this first iteration of the recognizer. //int peakGapInHerz = (int)configuration["PeakGap"]; int searchBand = 8; int topBin = (int)Math.Round(topFrequency / herzPerBin); int bottomBin = topBin - templateHeight - searchBand + 1; if (bottomBin < 0) { Log.Fatal("Template bandwidth exceeds availble bandwidth given your value for top frequency."); } spg = MatrixTools.Submatrix(spg, 0, bottomBin, sonogram.Data.GetLength(0) - 1, topBin); double[,] frames = MatrixTools.Submatrix(spg, 0, 0, callFrameWidth - 1, spg.GetLength(1) - 1); double[] spectrum = MatrixTools.GetColumnSums(frames); // set up arrays for monitoring important event parameters double[] decibels = new double[rowCount]; int[] bottomBins = new int[rowCount]; double[] scores = new double[rowCount]; // predefinition of score array int[] templateIds = new int[rowCount]; double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. for (int s = callFrameWidth; s < rowCount; s++) { double[] rowToRemove = MatrixTools.GetRow(spg, s - callFrameWidth); double[] rowToAdd = MatrixTools.GetRow(spg, s); // shift frame block to the right. for (int b = 0; b < spectrum.Length; b++) { spectrum[b] = spectrum[b] - rowToRemove[b] + rowToAdd[b]; } // now check if frame block matches a template. ScanEventScores(spectrum, templates, out double eventScore, out int eventBottomBin, out int templateId); //hits[rowCount, colCount]; decibels[s - callHalfWidth - 1] = spectrum.Max() / callFrameWidth; bottomBins[s - callHalfWidth - 1] = eventBottomBin + bottomBin; scores[s - callHalfWidth - 1] = eventScore; templateIds[s - callHalfWidth - 1] = templateId; } // loop through all spectra // we now have a score array and decibel array and bottom bin array for the entire spectrogram. // smooth them to find events scores = DataTools.filterMovingAverageOdd(scores, 5); decibels = DataTools.filterMovingAverageOdd(decibels, 3); var peaks = DataTools.GetPeaks(scores); // loop through the score array and find potential events var potentialEvents = new List <AcousticEvent>(); for (int s = callHalfWidth; s < scores.Length - callHalfWidth - 1; s++) { if (!peaks[s]) { continue; } if (scores[s] < similarityThreshold) { continue; } if (decibels[s] < eventThresholdDb) { continue; } // put hits into hits matrix // put cosine score into the score array //for (int s = point.X; s <= point.Y; s++) //{ // hits[s, topBins[s]] = 10; //} int bottomBinForEvent = bottomBins[s]; int topBinForEvent = bottomBinForEvent + templateHeight; int topFreqForEvent = (int)Math.Round(topBinForEvent * herzPerBin); int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin); double startTime = (s - callHalfWidth) * frameStepInSeconds; double durationTime = callFrameWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent) { //Name = string.Empty, // remove name because it hides spectral content of the event. Name = "Lc" + templateIds[s], Score = scores[s], }; newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin); potentialEvents.Add(newEvent); } // display the original score array scores = DataTools.normalise(scores); var scorePlot = new Plot(this.DisplayName + " scores", scores, similarityThreshold); DataTools.Normalise(decibels, eventThresholdDb, out double[] normalisedDb, out double normalisedThreshold); var decibelPlot = new Plot("Decibels", normalisedDb, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, decibelPlot }; if (this.displayDebugImage) { var debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // display the cosine similarity scores var plot = new Plot(this.DisplayName, scores, similarityThreshold); var plots = new List <Plot> { plot }; // add names into the returned events string speciesName = configuration[AnalysisKeys.SpeciesName] ?? this.SpeciesName; foreach (var ae in potentialEvents) { ae.Name = abbreviatedSpeciesName; ae.SpeciesName = speciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
internal RecognizerResults Gruntwork1(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { // make a spectrogram double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1; var config = new SonogramConfig { WindowSize = 512, WindowOverlap = 0.0, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); sonogram.Data = spg; int sampleRate = audioRecording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); //double epsilon = Math.Pow(0.5, audioRecording.BitsPerSample - 1); int frameSize = colCount * 2; int frameStep = frameSize; // this default = zero overlap //double frameDurationInSeconds = frameSize / (double)sampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; double herzPerBin = sampleRate / 2.0 / colCount; //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventThresholdDb = 10.0; // minimum score for an acceptable event - that is when processing the score array. double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512. int minFrameWidth = 3; int maxFrameWidth = 5; //double minDuration = (minFrameWidth - 1) * frameStepInSeconds; //double maxDuration = maxFrameWidth * frameStepInSeconds; // minimum number of bins covering frequency bandwidth of L.convex call int callBinWidth = 25; int silenceBinBuffer = 4; // # The Limnodynastes call has a duration of 3-5 frames given the above settings. // # The call has three major peaks. The dominant peak is at approx 1850, a value which is set in the convig. // # The second and third peak are at equal gaps below. DominantFreq-gap and DominantFreq-(2*gap); // # Set the gap in the Config file. Should typically be in range 880 to 970 // for Limnodynastes convex, in the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // for Limnodynastes convex, in the Kiyomi's JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So the strategy is to look for three peaks separated by same amount and in the vicinity of the above, // starting with highest power (the top peak) and working down to lowest power (bottom peak). // To this end we produce two templates each of length 25, but having 2nd and 3rd peaks at different intervals. var templates = GetLconvexTemplates(callBinWidth, silenceBinBuffer); int dominantFrequency = (int)configuration.GetIntOrNull("DominantFrequency"); // NOTE: could give user control over other call features // Such as frequency gap between peaks. But not in this first iteration of the recognizer. //int peakGapInHerz = (int)configuration["PeakGap"]; //int minHz = (int)configuration[AnalysisKeys.MinHz]; //int F1AndF2BinGap = (int)Math.Round(peakGapInHerz / herzPerBin); //int F1AndF3BinGap = 2 * F1AndF2BinGap; int hzBuffer = 250; int dominantBin = (int)Math.Round(dominantFrequency / herzPerBin); int binBuffer = (int)Math.Round(hzBuffer / herzPerBin); int dominantBinMin = dominantBin - binBuffer; int dominantBinMax = dominantBin + binBuffer; //int bandwidth = dominantBinMax - dominantBinMin + 1; int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] scores = new double[rowCount]; // predefinition of score array double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. // mark the hits in hitMatrix for (int s = 0; s < rowCount; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = -double.MaxValue; int maxId = 0; // loop through bandwidth of L.onvex call and look for dominant frequency for (int binId = 5; binId < dominantBinMax; binId++) { if (spectrum[binId] > maxAmplitude) { maxAmplitude = spectrum[binId]; maxId = binId; } } if (maxId < dominantBinMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } scores[s] = maxAmplitude; dominantBins[s] = maxId; // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // loop through all spectra // We now have a list of potential hits for LimCon. This needs to be filtered. Plot.FindStartsAndEndsOfScoreEvents(scores, eventThresholdDb, minFrameWidth, maxFrameWidth, out var prunedScores, out var startEnds); // loop through the score array and find beginning and end of potential events var potentialEvents = new List <AcousticEvent>(); foreach (Point point in startEnds) { // get average of the dominant bin int binSum = 0; int binCount = 0; int eventWidth = point.Y - point.X + 1; for (int s = point.X; s <= point.Y; s++) { if (dominantBins[s] >= dominantBinMin) { binSum += dominantBins[s]; binCount++; } } // find average dominant bin for the event int avDominantBin = (int)Math.Round(binSum / (double)binCount); int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * herzPerBin); // Get score for the event. // Use a simple template for the honk and calculate cosine similarity to the template. // Template has three dominant frequenices. var eventMatrix = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1); double[] eventAsVector = MatrixTools.SumColumns(eventMatrix); GetEventScore(eventAsVector, templates, out double eventScore, out int templateId); // put hits into hits matrix // put cosine score into the score array for (int s = point.X; s <= point.Y; s++) { hits[s, avDominantBin] = 10; prunedScores[s] = eventScore; } if (eventScore < similarityThreshold) { continue; } int topBinForEvent = avDominantBin + 2; int bottomBinForEvent = topBinForEvent - callBinWidth; int topFreqForEvent = (int)Math.Round(topBinForEvent * herzPerBin); int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin); double startTime = point.X * frameStepInSeconds; double durationTime = eventWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent) { //Name = string.Empty, // remove name because it hides spectral content of the event. Name = "L.c" + templateId, DominantFreq = avDominantFreq, Score = eventScore, }; newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin); potentialEvents.Add(newEvent); } // display the original score array scores = DataTools.normalise(scores); var debugPlot = new Plot(this.DisplayName, scores, similarityThreshold); var debugPlots = new List <Plot> { debugPlot }; if (this.displayDebugImage) { Image debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // display the cosine similarity scores var plot = new Plot(this.DisplayName, prunedScores, similarityThreshold); var plots = new List <Plot> { plot }; // add names into the returned events string speciesName = configuration[AnalysisKeys.SpeciesName] ?? this.SpeciesName; foreach (var ae in potentialEvents) { ae.Name = abbreviatedSpeciesName; ae.SpeciesName = speciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
/// <summary> /// Algorithm2: /// 1: Loop through spgm and find dominant freq bin and its amplitude in each frame /// 2: If frame passes amplitude test, then calculate a similarity cosine score for that frame. Simlarity score is wrt a template matrix. /// 3: If similarity score exceeds threshold, then assign event score based on the amplitude. /// </summary> internal RecognizerResults Algorithm2(AudioRecording recording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { double noiseReductionParameter = configuration.GetDoubleOrNull("BgNoiseThreshold") ?? 0.1; // make a spectrogram var config = new SonogramConfig { WindowSize = 256, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, WindowOverlap = 0.0, }; // now construct the standard decibel spectrogram WITH noise removal // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, recording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); sonogram.Data = spg; int sampleRate = recording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); //double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); int frameSize = colCount * 2; int frameStep = frameSize; // this default = zero overlap //double frameDurationInSeconds = frameSize / (double)sampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; double herzPerBin = sampleRate / 2.0 / colCount; //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventDecibelThreshold = configuration.GetDoubleOrNull("EventDecibelThreshold") ?? 6.0; // minimum score for an acceptable event - that is when processing the score array. double eventSimilarityThreshold = configuration.GetDoubleOrNull("EventSimilarityThreshold") ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512. //int minFrameWidth = 2; //int maxFrameWidth = 5; // this is larger than actual to accomodate an echo. //double minDuration = (minFrameWidth - 1) * frameStepInSeconds; //double maxDuration = maxFrameWidth * frameStepInSeconds; // minimum number of frames and bins covering the call // The PlatyplectrumOrnatum call has a duration of 3-5 frames GIVEN THE ABOVE SAMPLING and WINDOW SETTINGS! // Get the call templates and their dimensions var templates = GetTemplatesForAlgorithm2(out var callFrameDuration, out var callBinWidth); int dominantFrequency = configuration.GetInt("DominantFrequency"); const int hzBuffer = 100; int dominantBin = (int)Math.Round(dominantFrequency / herzPerBin); int binBuffer = (int)Math.Round(hzBuffer / herzPerBin); int dominantBinMin = dominantBin - binBuffer; int dominantBinMax = dominantBin + binBuffer; int bottomBin = 1; int topBin = bottomBin + callBinWidth - 1; int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] similarityScores = new double[rowCount]; // predefinition of score array double[] amplitudeScores = new double[rowCount]; double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram // NB: the spectrogram is rotated to vertical, i.e. rows = spectra, columns= freq bins mark the hits in hitMatrix for (int s = 1; s < rowCount - callFrameDuration; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = -double.MaxValue; int maxId = 0; // loop through bandwidth of call and look for dominant frequency for (int binId = 8; binId <= dominantBinMax; binId++) { if (spectrum[binId] > maxAmplitude) { maxAmplitude = spectrum[binId]; maxId = binId; } } if (maxId < dominantBinMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } //now calculate similarity with template var locality = MatrixTools.Submatrix(spg, s - 1, bottomBin, s + callFrameDuration - 2, topBin); // s-1 because first row of template is zeros. int localMaxBin = maxId - bottomBin; double callAmplitude = (locality[1, localMaxBin] + locality[2, localMaxBin] + locality[3, localMaxBin]) / 3.0; // use the following lines to write out call templates for use as recognizer //double[] columnSums = MatrixTools.SumColumns(locality); //if (columnSums[maxId - bottomBin] < 80) continue; //FileTools.WriteMatrix2File(locality, "E:\\SensorNetworks\\Output\\Frogs\\TestOfRecognizers-2016October\\Towsey.PlatyplectrumOrnatum\\Locality_S"+s+".csv"); double score = DataTools.CosineSimilarity(locality, templates[0]); if (score > eventSimilarityThreshold) { similarityScores[s] = score; dominantBins[s] = maxId; amplitudeScores[s] = callAmplitude; } } // loop through all spectra // loop through all spectra/rows of the spectrogram for a second time // NB: the spectrogram is rotated to vertical, i.e. rows = spectra, columns= freq bins // We now have a list of potential hits. This needs to be filtered. Mark the hits in hitMatrix var events = new List <AcousticEvent>(); for (int s = 1; s < rowCount - callFrameDuration; s++) { // find peaks in the array of similarity scores. First step, only look for peaks if (similarityScores[s] < similarityScores[s - 1] || similarityScores[s] < similarityScores[s + 1]) { continue; } // require three consecutive similarity scores to be above the threshold if (similarityScores[s + 1] < eventSimilarityThreshold || similarityScores[s + 2] < eventSimilarityThreshold) { continue; } // now check the amplitude if (amplitudeScores[s] < eventDecibelThreshold) { continue; } // have an event // find average dominant bin for the event int avDominantBin = (dominantBins[s] + dominantBins[s] + dominantBins[s]) / 3; int avDominantFreq = (int)Math.Round(avDominantBin * herzPerBin); int topBinForEvent = avDominantBin + 3; int bottomBinForEvent = topBinForEvent - callBinWidth; int topFreqForEvent = (int)Math.Round(topBinForEvent * herzPerBin); int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin); hits[s, avDominantBin] = 10; double startTime = s * frameStepInSeconds; double durationTime = 4 * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent) { DominantFreq = avDominantFreq, Score = amplitudeScores[s], // remove name because it hides spectral content in display of the event. Name = string.Empty, }; newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin); events.Add(newEvent); } // loop through all spectra // display the amplitude scores DataTools.Normalise(amplitudeScores, eventDecibelThreshold, out var normalisedScores, out var normalisedThreshold); var plot = new Plot(this.DisplayName, normalisedScores, normalisedThreshold); var plots = new List <Plot> { plot }; //DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display the original decibel score array var debugPlot = new Plot("Similarity Score", similarityScores, eventSimilarityThreshold); var debugPlots = new List <Plot> { plot, debugPlot }; var debugImage = DisplayDebugImage(sonogram, events, debugPlots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // add names into the returned events foreach (var ae in events) { ae.Name = "P.o"; // abbreviatedSpeciesName; } return(new RecognizerResults() { Events = events, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
internal RecognizerResults Algorithm1(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { double noiseReductionParameter = configuration.GetDoubleOrNull("BgNoiseThreshold") ?? 0.1; // make a spectrogram var config = new SonogramConfig { WindowSize = 256, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, WindowOverlap = 0.0, }; // now construct the standard decibel spectrogram WITH noise removal // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); sonogram.Data = spg; int sampleRate = audioRecording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); // double epsilon = Math.Pow(0.5, audioRecording.BitsPerSample - 1); int frameSize = colCount * 2; int frameStep = frameSize; // this default = zero overlap // double frameDurationInSeconds = frameSize / (double)sampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; double herzPerBin = sampleRate / 2 / (double)colCount; // string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; // string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventDecibelThreshold = configuration.GetDoubleOrNull("EventDecibelThreshold") ?? 6.0; // minimum score for an acceptable event - that is when processing the score array. double eventSimilarityThreshold = configuration.GetDoubleOrNull("EventSimilarityThreshold") ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512. int minFrameWidth = 2; int maxFrameWidth = 5; // this is larger than actual to accomodate an echo. // double minDuration = (minFrameWidth - 1) * frameStepInSeconds; // double maxDuration = maxFrameWidth * frameStepInSeconds; // minimum number of bins covering frequency bandwidth of call int callBinWidth = 19; // # The PlatyplectrumOrnatum call has a duration of 3-5 frames given the above settings. // To this end we produce two templates. var templates = GetTemplatesForAlgorithm1(callBinWidth); int dominantFrequency = configuration.GetInt("DominantFrequency"); // NOTE: could give user control over other call features // Such as frequency gap between peaks. But not in this first iteration of the recognizer. //int peakGapInHerz = (int)configuration["PeakGap"]; //int minHz = (int)configuration[AnalysisKeys.MinHz]; //int F1AndF2BinGap = (int)Math.Round(peakGapInHerz / herzPerBin); //int F1AndF3BinGap = 2 * F1AndF2BinGap; int hzBuffer = 100; int dominantBin = (int)Math.Round(dominantFrequency / herzPerBin); int binBuffer = (int)Math.Round(hzBuffer / herzPerBin); int dominantBinMin = dominantBin - binBuffer; int dominantBinMax = dominantBin + binBuffer; // int bandwidth = dominantBinMax - dominantBinMin + 1; int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] amplitudeScores = new double[rowCount]; // predefinition of amplitude score array double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. // mark the hits in hitMatrix for (int s = 0; s < rowCount; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = -double.MaxValue; int maxId = 0; // loop through bandwidth of call and look for dominant frequency for (int binId = 5; binId < dominantBinMax; binId++) { if (spectrum[binId] > maxAmplitude) { maxAmplitude = spectrum[binId]; maxId = binId; } } if (maxId < dominantBinMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } amplitudeScores[s] = maxAmplitude; dominantBins[s] = maxId; // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // loop through all spectra // We now have a list of potential hits. This needs to be filtered. Plot.FindStartsAndEndsOfScoreEvents(amplitudeScores, eventDecibelThreshold, minFrameWidth, maxFrameWidth, out var prunedScores, out var startEnds); // loop through the score array and find beginning and end of potential events var potentialEvents = new List <AcousticEvent>(); foreach (Point point in startEnds) { // get average of the dominant bin int binSum = 0; int binCount = 0; int eventWidth = point.Y - point.X + 1; for (int s = point.X; s <= point.Y; s++) { if (dominantBins[s] >= dominantBinMin) { binSum += dominantBins[s]; binCount++; } } // find average dominant bin for the event int avDominantBin = (int)Math.Round(binSum / (double)binCount); int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * herzPerBin); // Get score for the event. // Use a simple template for the honk and calculate cosine similarity to the template. // Template has three dominant frequenices. var eventMatrix = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1); double eventScore = GetEventScore(eventMatrix, templates); // put hits into hits matrix // put cosine score into the score array for (int s = point.X; s <= point.Y; s++) { hits[s, avDominantBin] = 10; prunedScores[s] = eventScore; } if (eventScore < eventSimilarityThreshold) { continue; } int topBinForEvent = avDominantBin + 2; int bottomBinForEvent = topBinForEvent - callBinWidth; int topFreqForEvent = (int)Math.Round(topBinForEvent * herzPerBin); int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin); double startTime = point.X * frameStepInSeconds; double durationTime = eventWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent) { DominantFreq = avDominantFreq, Score = eventScore, // remove name because it hides spectral content in display of the event. Name = string.Empty, }; newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin); potentialEvents.Add(newEvent); } // calculate the cosine similarity scores var plot = new Plot(this.DisplayName, prunedScores, eventSimilarityThreshold); var plots = new List <Plot> { plot }; //DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display the original decibel score array DataTools.Normalise(amplitudeScores, eventDecibelThreshold, out var normalisedScores, out var normalisedThreshold); var debugPlot = new Plot(this.DisplayName, normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { debugPlot, plot }; var debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName( Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // add names into the returned events foreach (var ae in potentialEvents) { ae.Name = "P.o"; // abbreviatedSpeciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // BETTER TO CALCULATE THIS. IGNORE USER! // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]); // duration of DCT in seconds double dctDuration = configuration.GetDouble(AnalysisKeys.DctDuration); // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); // The default was 512 for Canetoad. // Framesize = 128 seems to work for Littoria fallax. // frame size int frameSize = configuration.GetInt(AnalysisKeys.KeyFrameSize); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap( recording.SampleRate, frameSize, maxOscilFreq); //windowOverlap = 0.75; // previous default // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.2, }; TimeSpan recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); // double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, (rowCount - 1), maxbin); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER // This window is used to smooth the score array before extracting events. // A short window preserves sharper score edges to define events but also keeps noise. int scoreSmoothingWindow = 5; Oscillations2012.Execute( (SpectrogramStandard)sonogram, minHz, maxHz, dctDuration, minOscilFreq, maxOscilFreq, dctThreshold, eventThreshold, minDuration, maxDuration, scoreSmoothingWindow, out var scores, out var acousticEvents, out var hits, segmentStartOffset); acousticEvents.ForEach(ae => { ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = abbreviatedSpeciesName; }); var plot = new Plot(this.DisplayName, scores, eventThreshold); var plots = new List <Plot> { plot }; // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { Image debugImage = DisplayDebugImage(sonogram, acousticEvents, plots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plots, Events = acousticEvents, }); }
// OTHER CONSTANTS //private const string ImageViewer = @"C:\Windows\system32\mspaint.exe"; /// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new LitoriaBicolorConfig(); recognizerConfig.ReadConfigFile(configuration); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } TimeSpan recordingDuration = recording.WavReader.Time; //// ignore oscillations below this threshold freq //int minOscilFreq = (int)configuration[AnalysisKeys.MinOscilFreq]; //// ignore oscillations above this threshold freq int maxOscilRate = (int)Math.Ceiling(1 / recognizerConfig.MinPeriod); // this default framesize seems to work const int frameSize = 128; double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap( recording.SampleRate, frameSize, maxOscilRate); // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, //set default values - ignore those set by user WindowSize = frameSize, WindowOverlap = windowOverlap, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //############################################################################################################################################# //DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER var results = Analysis(recording, sonoConfig, recognizerConfig, MainEntry.InDEBUG, segmentStartOffset); //###################################################################### if (results == null) { return(null); //nothing to process } var sonogram = results.Item1; var hits = results.Item2; var scoreArray = results.Item3; var predictedEvents = results.Item4; var debugImage = results.Item5; //############################################################################################################################################# var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.SpeciesName, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); // Prune events here if erquired i.e. remove those below threshold score if this not already done. See other recognizers. foreach (AcousticEvent ae in predictedEvents) { // add additional info ae.Name = recognizerConfig.AbbreviatedSpeciesName; ae.SpeciesName = recognizerConfig.SpeciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; } var plot = new Plot(this.DisplayName, scoreArray, recognizerConfig.EventThreshold); return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = predictedEvents, }); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; const int frameSize = 256; const double windowOverlap = 0.0; double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // duration of DCT in seconds //double dctDuration = (double)configuration[AnalysisKeys.DctDuration]; // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double decibelThreshold = configuration.GetDouble(AnalysisKeys.DecibelThreshold); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; var recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(maxHz / freqBinWidth) + 1; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double framesPerSecond = freqBinWidth; double minPeriod = 1 / (double)maxOscilFreq; double maxPeriod = 1 / (double)minOscilFreq; double dctDuration = 5 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // remove baseline from amplitude array var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7); // remove hi freq content from amplitude array var lowPassFilteredSignal = DataTools.filterMovingAverageOdd(amplitudeArray, 11); var dctScores = new double[highPassFilteredSignal.Length]; const int step = 2; for (int i = dctLength; i < highPassFilteredSignal.Length - dctLength; i += step) { if (highPassFilteredSignal[i] < decibelThreshold) { continue; } double[] subArray = DataTools.Subarray(highPassFilteredSignal, i, dctLength); // Look for oscillations in the highPassFilteredSignal Oscillations2014.GetOscillationUsingDct(subArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; if (!periodWithinBounds) { continue; } if (intensity < dctThreshold) { continue; } //lay down score for sample length for (int j = 0; j < dctLength; j++) { if (dctScores[i + j] < intensity && lowPassFilteredSignal[i + j] > decibelThreshold) { dctScores[i + j] = intensity; } } } //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var acousticEvents = AcousticEvent.ConvertScoreArray2Events( dctScores, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); // ###################################################################### acousticEvents.ForEach(ae => { ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = abbreviatedSpeciesName; }); var plot = new Plot(this.DisplayName, dctScores, eventThreshold); var plots = new List <Plot> { plot }; // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(highPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold); DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { ampltdPlot, lowPassPlot, demeanedPlot, plot }; Image debugImage = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, debugPlots, null); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } return(new RecognizerResults() { Sonogram = sonogram, Hits = null, Plots = plots, Events = acousticEvents, }); }