public static double GetEventScore(double[,] eventMatrix, List <double[]> templates) { double[] eventAsVector = MatrixTools.SumColumns(eventMatrix); // need to reverse vector because template starts at the high freq end which is the fixed reference bin. eventAsVector = DataTools.reverseArray(eventAsVector); double maxScore = -double.MaxValue; foreach (double[] template in templates) { double eventScore = DataTools.CosineSimilarity(template, eventAsVector); if (maxScore < eventScore) { maxScore = eventScore; } } return(maxScore); }
private static void GetEventScore(double[] eventAsVector, List <double[]> templates, out double score, out int id) { // need to reverse vector because template starts at the high freq end which is the fixed reference bin. eventAsVector = DataTools.reverseArray(eventAsVector); double maxScore = -double.MaxValue; id = 0; for (int i = 0; i < templates.Count; i++) { double[] template = templates[i]; double eventScore = DataTools.CosineSimilarity(template, eventAsVector); // double eventScore = DataTools.PatternSimilarity(template, eventAsVector); if (maxScore < eventScore) { maxScore = eventScore; id = i + 1; } } // square the score to increase score contrast score = maxScore * maxScore; }
/// <summary> /// Algorithm2: /// 1: Loop through spgm and find dominant freq bin and its amplitude in each frame /// 2: If frame passes amplitude test, then calculate a similarity cosine score for that frame. Simlarity score is wrt a template matrix. /// 3: If similarity score exceeds threshold, then assign event score based on the amplitude. /// </summary> internal RecognizerResults Algorithm2(AudioRecording recording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { double noiseReductionParameter = configuration.GetDoubleOrNull("BgNoiseThreshold") ?? 0.1; // make a spectrogram var config = new SonogramConfig { WindowSize = 256, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, WindowOverlap = 0.0, }; // now construct the standard decibel spectrogram WITH noise removal // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, recording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); sonogram.Data = spg; int sampleRate = recording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); //double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); int frameSize = colCount * 2; int frameStep = frameSize; // this default = zero overlap //double frameDurationInSeconds = frameSize / (double)sampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; double herzPerBin = sampleRate / 2.0 / colCount; //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventDecibelThreshold = configuration.GetDoubleOrNull("EventDecibelThreshold") ?? 6.0; // minimum score for an acceptable event - that is when processing the score array. double eventSimilarityThreshold = configuration.GetDoubleOrNull("EventSimilarityThreshold") ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512. //int minFrameWidth = 2; //int maxFrameWidth = 5; // this is larger than actual to accomodate an echo. //double minDuration = (minFrameWidth - 1) * frameStepInSeconds; //double maxDuration = maxFrameWidth * frameStepInSeconds; // minimum number of frames and bins covering the call // The PlatyplectrumOrnatum call has a duration of 3-5 frames GIVEN THE ABOVE SAMPLING and WINDOW SETTINGS! // Get the call templates and their dimensions var templates = GetTemplatesForAlgorithm2(out var callFrameDuration, out var callBinWidth); int dominantFrequency = configuration.GetInt("DominantFrequency"); const int hzBuffer = 100; int dominantBin = (int)Math.Round(dominantFrequency / herzPerBin); int binBuffer = (int)Math.Round(hzBuffer / herzPerBin); int dominantBinMin = dominantBin - binBuffer; int dominantBinMax = dominantBin + binBuffer; int bottomBin = 1; int topBin = bottomBin + callBinWidth - 1; int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] similarityScores = new double[rowCount]; // predefinition of score array double[] amplitudeScores = new double[rowCount]; double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram // NB: the spectrogram is rotated to vertical, i.e. rows = spectra, columns= freq bins mark the hits in hitMatrix for (int s = 1; s < rowCount - callFrameDuration; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = -double.MaxValue; int maxId = 0; // loop through bandwidth of call and look for dominant frequency for (int binId = 8; binId <= dominantBinMax; binId++) { if (spectrum[binId] > maxAmplitude) { maxAmplitude = spectrum[binId]; maxId = binId; } } if (maxId < dominantBinMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } //now calculate similarity with template var locality = MatrixTools.Submatrix(spg, s - 1, bottomBin, s + callFrameDuration - 2, topBin); // s-1 because first row of template is zeros. int localMaxBin = maxId - bottomBin; double callAmplitude = (locality[1, localMaxBin] + locality[2, localMaxBin] + locality[3, localMaxBin]) / 3.0; // use the following lines to write out call templates for use as recognizer //double[] columnSums = MatrixTools.SumColumns(locality); //if (columnSums[maxId - bottomBin] < 80) continue; //FileTools.WriteMatrix2File(locality, "E:\\SensorNetworks\\Output\\Frogs\\TestOfRecognizers-2016October\\Towsey.PlatyplectrumOrnatum\\Locality_S"+s+".csv"); double score = DataTools.CosineSimilarity(locality, templates[0]); if (score > eventSimilarityThreshold) { similarityScores[s] = score; dominantBins[s] = maxId; amplitudeScores[s] = callAmplitude; } } // loop through all spectra // loop through all spectra/rows of the spectrogram for a second time // NB: the spectrogram is rotated to vertical, i.e. rows = spectra, columns= freq bins // We now have a list of potential hits. This needs to be filtered. Mark the hits in hitMatrix var events = new List <AcousticEvent>(); for (int s = 1; s < rowCount - callFrameDuration; s++) { // find peaks in the array of similarity scores. First step, only look for peaks if (similarityScores[s] < similarityScores[s - 1] || similarityScores[s] < similarityScores[s + 1]) { continue; } // require three consecutive similarity scores to be above the threshold if (similarityScores[s + 1] < eventSimilarityThreshold || similarityScores[s + 2] < eventSimilarityThreshold) { continue; } // now check the amplitude if (amplitudeScores[s] < eventDecibelThreshold) { continue; } // have an event // find average dominant bin for the event int avDominantBin = (dominantBins[s] + dominantBins[s] + dominantBins[s]) / 3; int avDominantFreq = (int)Math.Round(avDominantBin * herzPerBin); int topBinForEvent = avDominantBin + 3; int bottomBinForEvent = topBinForEvent - callBinWidth; int topFreqForEvent = (int)Math.Round(topBinForEvent * herzPerBin); int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin); hits[s, avDominantBin] = 10; double startTime = s * frameStepInSeconds; double durationTime = 4 * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent) { DominantFreq = avDominantFreq, Score = amplitudeScores[s], // remove name because it hides spectral content in display of the event. Name = string.Empty, }; newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin); events.Add(newEvent); } // loop through all spectra // display the amplitude scores DataTools.Normalise(amplitudeScores, eventDecibelThreshold, out var normalisedScores, out var normalisedThreshold); var plot = new Plot(this.DisplayName, normalisedScores, normalisedThreshold); var plots = new List <Plot> { plot }; //DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display the original decibel score array var debugPlot = new Plot("Similarity Score", similarityScores, eventSimilarityThreshold); var debugPlots = new List <Plot> { plot, debugPlot }; var debugImage = DisplayDebugImage(sonogram, events, debugPlots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // add names into the returned events foreach (var ae in events) { ae.Name = "P.o"; // abbreviatedSpeciesName; } return(new RecognizerResults() { Events = events, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { // common properties string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no name>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // BETTER TO CALCULATE THIS. IGNORE USER! // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]); // duration of DCT in seconds //double dctDuration = (double)configuration[AnalysisKeys.DctDuration]; // minimum acceptable value of a DCT coefficient //double dctThreshold = (double)configuration[AnalysisKeys.DctThreshold]; double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0; double decibelThreshold = configuration.GetDouble("DecibelThreshold"); //double minPeriod = (double)configuration["MinPeriod"]; //: 0.18 //double maxPeriod = (double)configuration["MaxPeriod"]; // //int maxOscilRate = (int)Math.Ceiling(1 /minPeriod); //int minOscilRate = (int)Math.Floor(1 /maxPeriod); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in second var maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event var eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); // this default framesize and overlap is best for the White Hrron of Bhutan. const int frameSize = 2048; double windowOverlap = 0.0; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; var recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(maxHz / freqBinWidth) + 1; /* ############################################################################################################################################# * window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins * 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz * 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz * 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz * 2048 22050 92.8ms 21.5 10.7666 1472ms */ BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); // var templates = GetTemplatesForAlgorithm1(14); var amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); bool[] peakArray = new bool[rowCount]; var amplitudeScores = new double[rowCount]; var hits = new double[rowCount, colCount]; const int maxTemplateLength = 20; const int templateEndPadding = 7; const int templateOffset = 14; const int minimumGap = 4; const int maximumGap = 100; // first find the amplitude peaks for (int j = 1; j < amplitudeArray.Length - 1; j++) { if (amplitudeArray[j] < decibelThreshold) { continue; } if (amplitudeArray[j] > amplitudeArray[j - 1] && amplitudeArray[j] > amplitudeArray[j + 1]) { peakArray[j] = true; } } // get template for end of Herron call var endTemplate = GetEndTemplateForAlgorithm2(); // now search for peaks that are the correct distance apart. for (int i = 2; i < amplitudeArray.Length - maxTemplateLength - templateEndPadding; i++) { if (!peakArray[i]) { continue; } // calculate distance to next peak int distanceToNextPeak = CalculateDistanceToNextPeak(peakArray, i); // skip gaps that are too small or too large if (distanceToNextPeak < minimumGap || distanceToNextPeak > maximumGap) { continue; } // The herron call ends with a rising whip // Check end of call using end template if (distanceToNextPeak > maxTemplateLength) { int start = i - templateOffset; if (start < 0) { start = 0; } var endLocality = DataTools.Subarray(amplitudeArray, start, endTemplate.Length); double endScore = DataTools.CosineSimilarity(endLocality, endTemplate); for (int to = -templateOffset; to < endTemplate.Length - templateOffset; to++) { if (i + to >= 0 && endScore > amplitudeScores[i + to]) { amplitudeScores[i + to] = endScore; // hits[i, minBin] = 10; } } for (int k = 2; k < maxTemplateLength; k++) { amplitudeScores[i + k] = 0.0; } continue; } // Get the start template which depends on distance to next peak. var startTemplate = GetTemplateForAlgorithm2(distanceToNextPeak, templateEndPadding); // now calculate similarity of locality with the startTemplate var locality = DataTools.Subarray(amplitudeArray, i - 2, startTemplate.Length); // i-2 because first two places should be zero. double score = DataTools.CosineSimilarity(locality, startTemplate); for (int t = 0; t < startTemplate.Length; t++) { if (score > amplitudeScores[i + t]) { amplitudeScores[i + t] = score; hits[i, minBin] = 10; } } } // loop over peak array var smoothedScores = DataTools.filterMovingAverageOdd(amplitudeScores, 3); // iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var predictedEvents = AcousticEvent.ConvertScoreArray2Events( smoothedScores, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); var prunedEvents = new List <AcousticEvent>(); foreach (var ae in predictedEvents) { if (ae.EventDurationSeconds < minDuration) { continue; } // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = abbreviatedSpeciesName; prunedEvents.Add(ae); } // do a recognizer test. //CompareArrayWithBenchmark(scores, new FileInfo(recording.FilePath)); //CompareArrayWithBenchmark(prunedEvents, new FileInfo(recording.FilePath)); var plot = new Plot(this.DisplayName, amplitudeScores, eventThreshold); return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = prunedEvents, }); }