public void TestFreqScaleOnArtificialSignal1() { int sampleRate = 22050; double duration = 20; // signal duration in seconds int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; int windowSize = 512; var freqScale = new FrequencyScale(sampleRate / 2, windowSize, 1000); var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal1_LinearFreqScale.png"); var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = 0.0, SourceFName = "Signal1", NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.12, }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); // pick a row, any row var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); oneSpectrum = DataTools.filterMovingAverage(oneSpectrum, 5); var peaks = DataTools.GetPeaks(oneSpectrum); for (int i = 5; i < peaks.Length - 5; i++) { if (peaks[i]) { LoggedConsole.WriteLine($"bin ={freqScale.BinBounds[i, 0]}, Herz={freqScale.BinBounds[i, 1]}-{freqScale.BinBounds[i + 1, 1]} "); } } foreach (int h in harmonics) { LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); } // spectrogram without framing, annotation etc var image = sonogram.GetImage(); string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sampleRate} Window={windowSize}"; image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); image.Save(outputImagePath); // Check that image dimensions are correct Assert.AreEqual(861, image.Width); Assert.AreEqual(310, image.Height); Assert.IsTrue(peaks[11]); Assert.IsTrue(peaks[22]); Assert.IsTrue(peaks[45]); Assert.IsTrue(peaks[92]); Assert.IsTrue(peaks[185]); }
public void TestFreqScaleOnArtificialSignal2() { int sampleRate = 64000; double duration = 30; // signal duration in seconds int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal2_OctaveFreqScale.png"); var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); // init the default sonogram config var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = 0.2, SourceFName = "Signal2", NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); // pick a row, any row var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); oneSpectrum = DataTools.filterMovingAverage(oneSpectrum, 5); var peaks = DataTools.GetPeaks(oneSpectrum); var peakIds = new List <int>(); for (int i = 5; i < peaks.Length - 5; i++) { if (peaks[i]) { int peakId = freqScale.BinBounds[i, 0]; peakIds.Add(peakId); LoggedConsole.WriteLine($"Spectral peak located in bin {peakId}, Herz={freqScale.BinBounds[i, 1]}"); } } foreach (int h in harmonics) { LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); } Assert.AreEqual(5, peakIds.Count); Assert.AreEqual(129, peakIds[0]); Assert.AreEqual(257, peakIds[1]); Assert.AreEqual(513, peakIds[2]); Assert.AreEqual(1025, peakIds[3]); Assert.AreEqual(2049, peakIds[4]); var image = sonogram.GetImage(); string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sampleRate} Window={freqScale.WindowSize}"; image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); image.Save(outputImagePath); // Check that image dimensions are correct Assert.AreEqual(146, image.Width); Assert.AreEqual(310, image.Height); }
/// <summary> /// ################ THE KEY ANALYSIS METHOD for TRILLS /// /// See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles. /// </summary> /// <param name="recording"></param> /// <param name="sonoConfig"></param> /// <param name="lwConfig"></param> /// <param name="returnDebugImage"></param> /// <param name="segmentStartOffset"></param> /// <returns></returns> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaWatjulumConfig lwConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { double intensityThreshold = lwConfig.IntensityThreshold; double minDuration = lwConfig.MinDurationOfTrill; // seconds double maxDuration = lwConfig.MaxDurationOfTrill; // seconds double minPeriod = lwConfig.MinPeriod; // seconds double maxPeriod = lwConfig.MaxPeriod; // seconds if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 4 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); //int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7); // Could smooth here rather than above. Above seemed slightly better? //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); //differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, lwConfig.DecibelThreshold, minDuration, maxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // LOOK FOR TRILL EVENTS // init the score array double[] scores = new double[rowCount]; // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedTrillEvents) { int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); double oscilFreq; double period; double intensity; Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } for (int j = 0; j < dctLength; j++) //lay down score for sample length { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}"; ae.Score_MaxInEvent = maximumIntensity; ae.Profile = lwConfig.ProfileNames[0]; confirmedEvents.Add(ae); } } //###################################################################### // LOOK FOR TINK EVENTS // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS double minDurationOfTink = lwConfig.MinDurationOfTink; // seconds double maxDurationOfTink = lwConfig.MaxDurationOfTink; // seconds // want stronger threshold for tink because brief. double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0; var predictedTinkEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, tinkDecibelThreshold, minDurationOfTink, maxDurationOfTink, segmentStartOffset); foreach (var ae2 in predictedTinkEvents) { // Prune the list of potential acoustic events, for example using Cosine Similarity. //rowtop, rowWidth //int eventStart = ae2.Oblong.RowTop; //int eventWidth = ae2.Oblong.RowWidth; //int step = 2; //double maximumIntensity = 0.0; // add abbreviatedSpeciesName into event //if (maximumIntensity >= intensityThreshold) //{ ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}"; //ae2.Score_MaxInEvent = maximumIntensity; ae2.Profile = lwConfig.ProfileNames[1]; confirmedEvents.Add(ae2); //} } //###################################################################### var scorePlot = new Plot(lwConfig.SpeciesName, scores, intensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
internal RecognizerResults Gruntwork(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1; // make a spectrogram var config = new SonogramConfig { WindowSize = 256, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; config.WindowOverlap = 0.0; // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); int sampleRate = audioRecording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); int frameSize = config.WindowSize; int frameStep = frameSize; // this default = zero overlap double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; // reading in variables from the config file string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventThresholdDb = 6; // minimum score for an acceptable event - that is when processing the score array. double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 256. int minFrameWidth = 7; int maxFrameWidth = 14; double minDuration = (minFrameWidth - 1) * frameStepInSeconds; double maxDuration = maxFrameWidth * frameStepInSeconds; // Calculate Max Amplitude int binMin = (int)Math.Round(minHz / sonogram.FBinWidth); int binMax = (int)Math.Round(maxHz / sonogram.FBinWidth); int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] scores = new double[rowCount]; // predefinition of score array double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. // mark the hits in hitMatrix for (int s = 0; s < rowCount; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = double.MinValue; int maxId = 0; // loop through bandwidth of L.onvex call and look for dominant frequency for (int binID = 5; binID < binMax; binID++) { if (spectrum[binID] > maxAmplitude) { maxAmplitude = spectrum[binID]; maxId = binID; } } if (maxId < binMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } scores[s] = maxAmplitude; dominantBins[s] = maxId; // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // loop through all spectra // Find average amplitude double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix( sonogram.Data, 0, binMin, rowCount - 1, binMax); var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7); // We now have a list of potential hits for C. tinnula. This needs to be filtered. var startEnds = new List <Point>(); Plot.FindStartsAndEndsOfScoreEvents(highPassFilteredSignal, eventThresholdDb, minFrameWidth, maxFrameWidth, out var prunedScores, out startEnds); // High pass Filter // loop through the score array and find beginning and end of potential events var potentialEvents = new List <AcousticEvent>(); foreach (Point point in startEnds) { // get average of the dominant bin int binSum = 0; int binCount = 0; int eventWidth = point.Y - point.X + 1; for (int s = point.X; s <= point.Y; s++) { if (dominantBins[s] >= binMin) { binSum += dominantBins[s]; binCount++; } } // find average dominant bin for the event int avDominantBin = (int)Math.Round(binSum / (double)binCount); int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * sonogram.FBinWidth); // Get score for the event. // Use a simple template for the honk and calculate cosine similarity to the template. // Template has three dominant frequenices. // minimum number of bins covering frequency bandwidth of C. tinnula call// minimum number of bins covering frequency bandwidth of L.convex call int callBinWidth = 14; var templates = GetCtinnulaTemplates(callBinWidth); var eventMatrix = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1); double eventScore = GetEventScore(eventMatrix, templates); // put hits into hits matrix // put cosine score into the score array for (int s = point.X; s <= point.Y; s++) { hits[s, avDominantBin] = 10; prunedScores[s] = eventScore; } if (eventScore < similarityThreshold) { continue; } int topBinForEvent = avDominantBin + 2; int bottomBinForEvent = topBinForEvent - callBinWidth; double startTime = point.X * frameStepInSeconds; double durationTime = eventWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, minHz, maxHz); newEvent.DominantFreq = avDominantFreq; newEvent.Score = eventScore; newEvent.SetTimeAndFreqScales(framesPerSec, sonogram.FBinWidth); newEvent.Name = string.Empty; // remove name because it hides spectral content of the event. potentialEvents.Add(newEvent); } // display the original score array scores = DataTools.normalise(scores); var debugPlot = new Plot(this.DisplayName, scores, similarityThreshold); // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, eventThresholdDb, out var normalisedScores, out var normalisedThreshold); var ampltdPlot = new Plot("Average amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(highPassFilteredSignal, eventThresholdDb, out normalisedScores, out normalisedThreshold); var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold); /* * DataTools.Normalise(scores, eventThresholdDb, out normalisedScores, out normalisedThreshold); * var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold); * * * DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); * var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold); */ var debugPlots = new List <Plot> { ampltdPlot, demeanedPlot }; Image debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, null); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // display the cosine similarity scores var plot = new Plot(this.DisplayName, prunedScores, similarityThreshold); var plots = new List <Plot> { plot }; // add names into the returned events foreach (AcousticEvent ae in potentialEvents) { ae.Name = "speciesName"; // abbreviatedSpeciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="audioRecording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording audioRecording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { const double minAmplitudeThreshold = 0.1; const int percentile = 5; const double scoreThreshold = 0.3; const bool doFiltering = true; const int windowWidth = 1024; const int signalBuffer = windowWidth * 2; //string path = @"C:\SensorNetworks\WavFiles\Freshwater\savedfortest.wav"; //audioRecording.Save(path); // this does not work int sr = audioRecording.SampleRate; int nyquist = audioRecording.Nyquist; // Get a value from the config file - with a backup default //int minHz = (int?)configuration[AnalysisKeys.MinHz] ?? 600; // Get a value from the config file - with no default, throw an exception if value is not present //int maxHz = ((int?)configuration[AnalysisKeys.MaxHz]).Value; // Get a value from the config file - without a string accessor, as a double //double someExampleSettingA = (double?)configuration.someExampleSettingA ?? 0.0; // common properties //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // min score for an acceptable event double eventThreshold = (double)configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold); // get samples var samples = audioRecording.WavReader.Samples; double[] bandPassFilteredSignal = null; if (doFiltering) { // high pass filter int windowLength = 71; double[] highPassFilteredSignal; DSP_IIRFilter.ApplyMovingAvHighPassFilter(samples, windowLength, out highPassFilteredSignal); //DSP_IIRFilter filter2 = new DSP_IIRFilter("Chebyshev_Highpass_400"); //int order2 = filter2.order; //filter2.ApplyIIRFilter(samples, out highPassFilteredSignal); // Amplify 40dB and clip to +/-1.0; double factor = 100; // equiv to 20dB highPassFilteredSignal = DspFilters.AmplifyAndClip(highPassFilteredSignal, factor); //low pass filter string filterName = "Chebyshev_Lowpass_5000, scale*5"; DSP_IIRFilter filter = new DSP_IIRFilter(filterName); int order = filter.order; //System.LoggedConsole.WriteLine("\nTest " + filterName + ", order=" + order); filter.ApplyIIRFilter(highPassFilteredSignal, out bandPassFilteredSignal); } else // do not filter because already filtered - using Chris's filtered recording { bandPassFilteredSignal = samples; } // calculate an amplitude threshold that is above Nth percentile of amplitudes in the subsample int[] histogramOfAmplitudes; double minAmplitude; double maxAmplitude; double binWidth; int window = 66; Histogram.GetHistogramOfWaveAmplitudes(bandPassFilteredSignal, window, out histogramOfAmplitudes, out minAmplitude, out maxAmplitude, out binWidth); int percentileBin = Histogram.GetPercentileBin(histogramOfAmplitudes, percentile); double amplitudeThreshold = (percentileBin + 1) * binWidth; if (amplitudeThreshold < minAmplitudeThreshold) { amplitudeThreshold = minAmplitudeThreshold; } bool doAnalysisOfKnownExamples = true; if (doAnalysisOfKnownExamples) { // go to fixed location to check //1:02.07, 1:07.67, 1:12.27, 1:12.42, 1:12.59, 1:12.8, 1.34.3, 1:35.3, 1:40.16, 1:50.0, 2:05.9, 2:06.62, 2:17.57, 2:21.0 //2:26.33, 2:43.07, 2:43.15, 3:16.55, 3:35.09, 4:22.44, 4:29.9, 4:42.6, 4:51.48, 5:01.8, 5:21.15, 5:22.72, 5:32.37, 5.36.1, //5:42.82, 6:03.5, 6:19.93, 6:21.55, 6:42.0, 6:42.15, 6:46.44, 7:12.17, 7:42.65, 7:45.86, 7:46.18, 7:52.38, 7:59.11, 8:10.63, //8:14.4, 8:14.63, 8_15_240, 8_46_590, 8_56_590, 9_25_77, 9_28_94, 9_30_5, 9_43_9, 10_03_19, 10_24_26, 10_24_36, 10_38_8, //10_41_08, 10_50_9, 11_05_13, 11_08_63, 11_44_66, 11_50_36, 11_51_2, 12_04_93, 12_10_05, 12_20_78, 12_27_0, 12_38_5, //13_02_25, 13_08_18, 13_12_8, 13_25_24, 13_36_0, 13_50_4, 13_51_2, 13_57_87, 14_15_00, 15_09_74, 15_12_14, 15_25_79 //double[] times = { 2.2, 26.589, 29.62 }; //double[] times = { 2.2, 3.68, 10.83, 24.95, 26.589, 27.2, 29.62 }; //double[] times = { 2.2, 3.68, 10.83, 24.95, 26.589, 27.2, 29.62, 31.39, 62.1, 67.67, 72.27, 72.42, 72.59, 72.8, 94.3, 95.3, // 100.16, 110.0, 125.9, 126.62, 137.57, 141.0, 146.33, 163.07, 163.17, 196.55, 215.09, 262.44, 269.9, 282.6, // 291.48, 301.85, 321.18, 322.72, 332.37, 336.1, 342.82, 363.5, 379.93, 381.55, 402.0, 402.15, 406.44, 432.17, // 462.65, 465.86, 466.18, 472.38, 479.14, 490.63, 494.4, 494.63, 495.240, 526.590, 536.590, 565.82, 568.94, // 570.5, 583.9, 603.19, 624.26, 624.36, 638.8, 641.08, 650.9, 65.13, 68.63, 704.66, // 710.36, 711.2, 724.93, 730.05, 740.78, 747.05, 758.5, 782.25, 788.18, 792.8, // 805.24, 816.03, 830.4, 831.2, 837.87, 855.02, 909.74, 912.14, 925.81 }; var filePath = new FileInfo(@"C:\SensorNetworks\WavFiles\Freshwater\GruntSummaryRevisedAndEditedByMichael.csv"); List <CatFishCallData> data = Csv.ReadFromCsv <CatFishCallData>(filePath, true).ToList(); //var catFishCallDatas = data as IList<CatFishCallData> ?? data.ToList(); int count = data.Count(); var subSamplesDirectory = outputDirectory.CreateSubdirectory("testSubsamples_5000LPFilter"); //for (int t = 0; t < times.Length; t++) foreach (var fishCall in data) { //Image bmp1 = IctalurusFurcatus.AnalyseLocation(bandPassFilteredSignal, sr, times[t], windowWidth); // use following line where using time in seconds //int location = (int)Math.Round(times[t] * sr); //assume location points to start of grunt //double[] subsample = DataTools.Subarray(bandPassFilteredSignal, location - signalBuffer, 2 * signalBuffer); // use following line where using sample int location1 = fishCall.Sample / 2; //assume Chris's sample location points to centre of grunt. Divide by 2 because original recording was 44100. int location = (int)Math.Round(fishCall.TimeSeconds * sr); //assume location points to centre of grunt double[] subsample = DataTools.Subarray(bandPassFilteredSignal, location - signalBuffer, 2 * signalBuffer); // calculate an amplitude threshold that is above 95th percentile of amplitudes in the subsample //int[] histogramOfAmplitudes; //double minAmplitude; //double maxAmplitude; //double binWidth; //int window = 70; //int percentile = 90; //Histogram.GetHistogramOfWaveAmplitudes(subsample, window, out histogramOfAmplitudes, out minAmplitude, out maxAmplitude, out binWidth); //int percentileBin = Histogram.GetPercentileBin(histogramOfAmplitudes, percentile); //double amplitudeThreshold = (percentileBin + 1) * binWidth; //if (amplitudeThreshold < minAmplitudeThreshold) amplitudeThreshold = minAmplitudeThreshold; double[] scores1 = AnalyseWaveformAtLocation(subsample, amplitudeThreshold, scoreThreshold); string title1 = $"scores={fishCall.Timehms}"; Image bmp1 = GraphsAndCharts.DrawGraph(title1, scores1, subsample.Length, 300, 1); //bmp1.Save(path1.FullName); string title2 = $"tStart={fishCall.Timehms}"; Image bmp2 = GraphsAndCharts.DrawWaveform(title2, subsample, 1); var path1 = subSamplesDirectory.CombineFile($"scoresForTestSubsample_{fishCall.TimeSeconds}secs.png"); //var path2 = subSamplesDirectory.CombineFile($@"testSubsample_{times[t]}secs.wav.png"); Image[] imageList = { bmp2, bmp1 }; Image bmp3 = ImageTools.CombineImagesVertically(imageList); bmp3.Save(path1.FullName); //write wave form to txt file for later work in XLS //var path3 = subSamplesDirectory.CombineFile($@"testSubsample_{times[t]}secs.wav.csv"); //signalBuffer = 800; //double[] subsample2 = DataTools.Subarray(bandPassFilteredSignal, location - signalBuffer, 3 * signalBuffer); //FileTools.WriteArray2File(subsample2, path3.FullName); } } int signalLength = bandPassFilteredSignal.Length; // count number of 1000 sample segments int blockLength = 1000; int blockCount = signalLength / blockLength; int[] indexOfMax = new int[blockCount]; double[] maxInBlock = new double[blockCount]; for (int i = 0; i < blockCount; i++) { double max = -2.0; int blockStart = blockLength * i; for (int s = 0; s < blockLength; s++) { double absValue = Math.Abs(bandPassFilteredSignal[blockStart + s]); if (absValue > max) { max = absValue; maxInBlock[i] = max; indexOfMax[i] = blockStart + s; } } } // transfer max values to a list var indexList = new List <int>(); for (int i = 1; i < blockCount - 1; i++) { // only find the blocks that contain a max value that is > neighbouring blocks if (maxInBlock[i] > maxInBlock[i - 1] && maxInBlock[i] > maxInBlock[i + 1]) { indexList.Add(indexOfMax[i]); } //ALTERNATIVELY // look at max in each block //indexList.Add(indexOfMax[i]); } // now process neighbourhood of each max int binCount = windowWidth / 2; FFT.WindowFunc wf = FFT.Hamming; var fft = new FFT(windowWidth, wf); int maxHz = 1000; double hzPerBin = nyquist / (double)binCount; int requiredBinCount = (int)Math.Round(maxHz / hzPerBin); // init list of events List <AcousticEvent> events = new List <AcousticEvent>(); double[] scores = new double[signalLength]; // init of score array int id = 0; foreach (int location in indexList) { //System.LoggedConsole.WriteLine("Location " + location + ", id=" + id); int start = location - binCount; if (start < 0) { continue; } int end = location + binCount; if (end >= signalLength) { continue; } double[] subsampleWav = DataTools.Subarray(bandPassFilteredSignal, start, windowWidth); var spectrum = fft.Invoke(subsampleWav); // convert to power spectrum = DataTools.SquareValues(spectrum); spectrum = DataTools.filterMovingAverageOdd(spectrum, 3); spectrum = DataTools.normalise(spectrum); var subBandSpectrum = DataTools.Subarray(spectrum, 1, requiredBinCount); // ignore DC in bin zero. // now do some tests on spectrum to determine if it is a candidate grunt bool eventFound = false; double[] scoreArray = CalculateScores(subBandSpectrum, windowWidth); double score = scoreArray[0]; if (score > scoreThreshold) { eventFound = true; } if (eventFound) { for (int i = location - binCount; i < location + binCount; i++) { scores[location] = score; } var startTime = TimeSpan.FromSeconds((location - binCount) / (double)sr); string startLabel = startTime.Minutes + "." + startTime.Seconds + "." + startTime.Milliseconds; Image image4 = GraphsAndCharts.DrawWaveAndFft(subsampleWav, sr, startTime, spectrum, maxHz * 2, scoreArray); var path4 = outputDirectory.CreateSubdirectory("subsamples").CombineFile($@"subsample_{location}_{startLabel}.png"); image4.Save(path4.FullName); // have an event, store the data in the AcousticEvent class double duration = 0.2; int minFreq = 50; int maxFreq = 1000; var anEvent = new AcousticEvent(segmentStartOffset, startTime.TotalSeconds, duration, minFreq, maxFreq); anEvent.Name = "grunt"; //anEvent.Name = DataTools.WriteArrayAsCsvLine(subBandSpectrum, "f4"); anEvent.Score = score; events.Add(anEvent); } id++; } // make a spectrogram var config = new SonogramConfig { NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.0, }; var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); //// when the value is accessed, the indices are calculated //var indices = getSpectralIndexes.Value; //// check if the indices have been calculated - you shouldn't actually need this //if (getSpectralIndexes.IsValueCreated) //{ // // then indices have been calculated before //} var plot = new Plot(this.DisplayName, scores, eventThreshold); return(new RecognizerResults() { Events = events, Hits = null, //ScoreTrack = null, Plots = plot.AsList(), Sonogram = sonogram, }); }
public void TestOfSpectralIndices_Octave() { // create a two-minute artificial recording containing five harmonics. int sampleRate = 64000; double duration = 120; // signal duration in seconds int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Sine); // cut out one minute from 30 - 90 seconds and incorporate into AudioRecording int startSample = sampleRate * 30; // start two minutes into recording int subsegmentSampleCount = sampleRate * 60; // get 60 seconds double[] subsamples = DataTools.Subarray(recording.WavReader.Samples, startSample, subsegmentSampleCount); var wr = new Acoustics.Tools.Wav.WavReader(subsamples, 1, 16, sampleRate); var subsegmentRecording = new AudioRecording(wr); //var indexPropertiesConfig = PathHelper.ResolveConfigFile(@"IndexPropertiesConfig.yml"); var configFile = PathHelper.ResolveConfigFile(@"Towsey.Acoustic.yml"); // Create temp directory to store output if (!this.outputDirectory.Exists) { this.outputDirectory.Create(); } // CHANGE CONFIG PARAMETERS HERE IF REQUIRED var indexCalculateConfig = ConfigFile.Deserialize <IndexCalculateConfig>(configFile); indexCalculateConfig.FrequencyScale = FreqScaleType.Octave; var freqScale = new FrequencyScale(indexCalculateConfig.FrequencyScale); indexCalculateConfig.FrameLength = freqScale.WindowSize; var results = IndexCalculate.Analysis( subsegmentRecording, TimeSpan.Zero, indexCalculateConfig.IndexProperties, sampleRate, TimeSpan.Zero, indexCalculateConfig, returnSonogramInfo: true); var spectralIndices = results.SpectralIndexValues; // draw the output image of all spectral indices var outputImagePath1 = Path.Combine(this.outputDirectory.FullName, "SpectralIndices_Octave.png"); var image = SpectralIndexValues.CreateImageOfSpectralIndices(spectralIndices); image.Save(outputImagePath1); // TEST the BGN SPECTRAL INDEX Assert.AreEqual(256, spectralIndices.BGN.Length); var resourcesDir = PathHelper.ResolveAssetPath("Indices"); var expectedSpectrumFile = new FileInfo(resourcesDir + "\\BGN_OctaveScale.bin"); //Binary.Serialize(expectedSpectrumFile, spectralIndices.BGN); var expectedVector = Binary.Deserialize <double[]>(expectedSpectrumFile); CollectionAssert.That.AreEqual(expectedVector, spectralIndices.BGN, AllowedDelta); expectedSpectrumFile = new FileInfo(resourcesDir + "\\CVR_OctaveScale.bin"); //Binary.Serialize(expectedSpectrumFile, spectralIndices.CVR); expectedVector = Binary.Deserialize <double[]>(expectedSpectrumFile); CollectionAssert.That.AreEqual(expectedVector, spectralIndices.CVR, AllowedDelta); }
public void TestCalculateEventStatistics() { int sampleRate = 22050; double duration = 28; int[] harmonics1 = { 500 }; int[] harmonics2 = { 500, 1000, 2000, 4000, 8000 }; var signal1 = DspFilters.GenerateTestSignal(sampleRate, duration, harmonics1, WaveType.Sine); var signal2 = DspFilters.GenerateTestSignal(sampleRate, 4, harmonics2, WaveType.Sine); var signal3 = DspFilters.GenerateTestSignal(sampleRate, duration, harmonics1, WaveType.Sine); var signal = DataTools.ConcatenateVectors(signal1, signal2, signal3); var wr = new WavReader(signal, 1, 16, sampleRate); var recording = new AudioRecording(wr); // this value is fake, but we set it to ensure output values are calculated correctly w.r.t. segment start var segmentOffset = 547.123.Seconds(); var start = TimeSpan.FromSeconds(28) + segmentOffset; var end = TimeSpan.FromSeconds(32) + segmentOffset; double lowFreq = 1500.0; double topFreq = 8500.0; var statsConfig = new EventStatisticsConfiguration() { FrameSize = 512, FrameStep = 512, }; EventStatistics stats = EventStatisticsCalculate.AnalyzeAudioEvent( recording, (start, end).AsRange(), (lowFreq, topFreq).AsRange(), statsConfig, segmentOffset); LoggedConsole.WriteLine($"Stats: Temporal entropy = {stats.TemporalEnergyDistribution:f4}"); LoggedConsole.WriteLine($"Stats: Spectral entropy = {stats.SpectralEnergyDistribution:f4}"); LoggedConsole.WriteLine($"Stats: Spectral centroid= {stats.SpectralCentroid}"); LoggedConsole.WriteLine($"Stats: DominantFrequency= {stats.DominantFrequency}"); Assert.AreEqual(0.0, stats.TemporalEnergyDistribution, 1E-4); Assert.AreEqual(0.6062, stats.SpectralEnergyDistribution, 1E-4); Assert.AreEqual(6687, stats.SpectralCentroid); Assert.AreEqual(8003, stats.DominantFrequency); Assert.AreEqual(1500, stats.LowFrequencyHertz); Assert.AreEqual(8500, stats.HighFrequencyHertz); Assert.AreEqual(28.Seconds() + segmentOffset, stats.EventStartSeconds.Seconds()); Assert.AreEqual(32.Seconds() + segmentOffset, stats.EventEndSeconds.Seconds()); Assert.AreEqual(28.Seconds() + segmentOffset, stats.ResultStartSeconds.Seconds()); /* * // Assume linear scale. * int nyquist = sampleRate / 2; * var freqScale = new FrequencyScale(nyquist: nyquist, frameSize: statsConfig.FrameSize, hertzLinearGridInterval: 1000); * * var sonoConfig = new SonogramConfig * { * WindowSize = statsConfig.FrameSize, * WindowStep = statsConfig.FrameSize, * WindowOverlap = 0.0, * SourceFName = "SineSignal3", * NoiseReductionType = NoiseReductionType.Standard, * NoiseReductionParameter = 0.12, * }; * var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); * var image = sonogram.GetImage(); * string title = $"Spectrogram of Harmonics: SR={sampleRate} Window={freqScale.WindowSize}"; * image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); * string path = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\SineSignal3.png"; * image.Save(path); * * // get spectrum from row 1300 * var normalisedIndex = DataTools.normalise(MatrixTools.GetRow(sonogram.Data, 1300)); * var image2 = GraphsAndCharts.DrawGraph("SPECTRUM", normalisedIndex, 100); * string path2 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\Spectrum3.png"; * image2.Save(path2); */ }
/// <summary> /// THE KEY ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaBicolorConfig lbConfig, bool drawDebugImage, TimeSpan segmentStartOffset) { double decibelThreshold = lbConfig.DecibelThreshold; //dB double intensityThreshold = lbConfig.IntensityThreshold; //double eventThreshold = lbConfig.EventThreshold; //in 0-1 if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 3 * lbConfig.MaxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0); // Could smooth here rather than above. Above seemed slightly better? amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var predictedEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lbConfig.LowerBandMinHz, lbConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, decibelThreshold, lbConfig.MinDuration, lbConfig.MaxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // init the score array double[] scores = new double[rowCount]; //iii: CONVERT SCORES TO ACOUSTIC EVENTS // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedEvents) { //rowtop, rowWidth int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } // lay down score for sample length for (int j = 0; j < dctLength; j++) { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = "L.b"; ae.Score_MaxInEvent = maximumIntensity; confirmedEvents.Add(ae); } } //###################################################################### // calculate the cosine similarity scores var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold); //DEBUG IMAGE this recognizer only. MUST set false for deployment. Image debugImage = null; if (drawDebugImage) { // display a variety of debug score arrays //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold); //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold); //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold); DataTools.Normalise(amplitudeScores, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; // other debug plots //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot }; debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; const int frameSize = 256; const double windowOverlap = 0.0; double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // duration of DCT in seconds //double dctDuration = (double)configuration[AnalysisKeys.DctDuration]; // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double decibelThreshold = configuration.GetDouble(AnalysisKeys.DecibelThreshold); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; var recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(maxHz / freqBinWidth) + 1; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double framesPerSecond = freqBinWidth; double minPeriod = 1 / (double)maxOscilFreq; double maxPeriod = 1 / (double)minOscilFreq; double dctDuration = 5 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // remove baseline from amplitude array var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7); // remove hi freq content from amplitude array var lowPassFilteredSignal = DataTools.filterMovingAverageOdd(amplitudeArray, 11); var dctScores = new double[highPassFilteredSignal.Length]; const int step = 2; for (int i = dctLength; i < highPassFilteredSignal.Length - dctLength; i += step) { if (highPassFilteredSignal[i] < decibelThreshold) { continue; } double[] subArray = DataTools.Subarray(highPassFilteredSignal, i, dctLength); // Look for oscillations in the highPassFilteredSignal Oscillations2014.GetOscillationUsingDct(subArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; if (!periodWithinBounds) { continue; } if (intensity < dctThreshold) { continue; } //lay down score for sample length for (int j = 0; j < dctLength; j++) { if (dctScores[i + j] < intensity && lowPassFilteredSignal[i + j] > decibelThreshold) { dctScores[i + j] = intensity; } } } //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var acousticEvents = AcousticEvent.ConvertScoreArray2Events( dctScores, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); // ###################################################################### acousticEvents.ForEach(ae => { ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = abbreviatedSpeciesName; }); var plot = new Plot(this.DisplayName, dctScores, eventThreshold); var plots = new List <Plot> { plot }; // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(highPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold); DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { ampltdPlot, lowPassPlot, demeanedPlot, plot }; Image debugImage = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, debugPlots, null); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } return(new RecognizerResults() { Sonogram = sonogram, Hits = null, Plots = plots, Events = acousticEvents, }); }
public void TestAnalyzeSr22050Recording() { int sampleRate = 22050; double duration = 420; // signal duration in seconds = 7 minutes int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); var recordingPath = this.outputDirectory.CombineFile("TemporaryRecording1.wav"); WavWriter.WriteWavFileViaFfmpeg(recordingPath, recording.WavReader); // draw the signal as spectrogram just for debugging purposes /* * var fst = FreqScaleType.Linear; * var freqScale = new FrequencyScale(fst); * var sonoConfig = new SonogramConfig * { * WindowSize = 512, * WindowOverlap = 0.0, * SourceFName = recording.BaseName, * NoiseReductionType = NoiseReductionType.Standard, * NoiseReductionParameter = 2.0, * }; * var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); * var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM", freqScale.GridLineLocations); * var outputImagePath = this.outputDirectory.CombineFile("Signal1_LinearFreqScale.png"); * image.Save(outputImagePath.FullName); */ var configPath = PathHelper.ResolveConfigFile("Towsey.Acoustic.yml"); var arguments = new AnalyseLongRecording.Arguments { Source = recordingPath, Config = configPath.FullName, Output = this.outputDirectory, MixDownToMono = true, Parallel = !Debugger.IsAttached, }; AnalyseLongRecording.Execute(arguments); var resultsDirectory = this.outputDirectory.Combine("Towsey.Acoustic"); var listOfFiles = resultsDirectory.EnumerateFiles().ToArray(); Assert.AreEqual(38, listOfFiles.Length); var csvCount = listOfFiles.Count(f => f.Name.EndsWith(".csv")); Assert.AreEqual(15, csvCount); var jsonCount = listOfFiles.Count(f => f.Name.EndsWith(".json")); Assert.AreEqual(2, jsonCount); var pngCount = listOfFiles.Count(f => f.Name.EndsWith(".png")); Assert.AreEqual(21, pngCount); var twoMapsImagePath = resultsDirectory.CombineFile("TemporaryRecording1__2Maps.png"); var twoMapsImage = Image.Load <Rgb24>(twoMapsImagePath.FullName); // image is 7 * 632 Assert.AreEqual(7, twoMapsImage.Width); Assert.AreEqual(632, twoMapsImage.Height); var bgnFile = resultsDirectory.CombineFile("TemporaryRecording1__Towsey.Acoustic.BGN.csv"); double[,] actualBgn = Csv.ReadMatrixFromCsv <double>(bgnFile, TwoDimensionalArray.None); var expectedSpectrumFile = PathHelper.ResolveAsset("LongDuration", "BgnMatrix.LinearScale.csv"); // uncomment the following line when first produce the array // bgnFile.CopyTo(expectedSpectrumFile.FullName); // compare actual BGN file with expected file. var expectedBgn = Csv.ReadMatrixFromCsv <double>(expectedSpectrumFile, TwoDimensionalArray.None); CollectionAssert.That.AreEqual(expectedBgn, actualBgn, 0.000_000_001); var array = MatrixTools.GetRow(actualBgn, 0); Assert.AreEqual(7, expectedBgn.RowLength()); Assert.AreEqual(256, array.Length); // draw array just to check peaks are in correct places - just for debugging purposes var ldsBgnSpectrumFile = this.outputDirectory.CombineFile("Spectrum1.png"); GraphsAndCharts.DrawGraph(array, "LD BGN SPECTRUM Linear", ldsBgnSpectrumFile); var generationData = Json.Deserialize <IndexGenerationData>(IndexGenerationData.FindFile(resultsDirectory)); Assert.AreEqual("TemporaryRecording1", generationData.RecordingBasename); }
public void TestAnalyzeSr64000Recording() { int sampleRate = 64000; double duration = 420; // signal duration in seconds = 7 minutes int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); string recordingName = "TemporaryRecording2"; var recordingPath = this.outputDirectory.CombineFile(recordingName + ".wav"); WavWriter.WriteWavFileViaFfmpeg(recordingPath, recording.WavReader); var fst = FreqScaleType.Linear125Octaves7Tones28Nyquist32000; var freqScale = new FrequencyScale(fst); /* * // draw the signal as spectrogram just for debugging purposes * // but can only draw a two minute spectrogram when sr=64000 - change duration above. * duration = 120; // if drawing sonogram, then set signal duration = 2 minutes * var sonogram = OctaveFreqScale.ConvertRecordingToOctaveScaleSonogram(recording, fst); * var sonogramImage = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM", freqScale.GridLineLocations); * var outputImagePath = this.outputDirectory.CombineFile("SignalSpectrogram_OctaveFreqScale.png"); * sonogramImage.Save(outputImagePath.FullName); */ // Now need to rewrite the config file with new parameter settings var configPath = PathHelper.ResolveConfigFile("Towsey.Acoustic.yml"); // Convert the Config config to IndexCalculateConfig class and merge in the unnecesary parameters. //Config configuration = Yaml.Deserialise(configPath); //IndexCalculateConfig config = IndexCalculateConfig.GetConfig(configuration, false); // because of difficulties in dealing with Config config files, just edit the text file!!!!! var configLines = File.ReadAllLines(configPath.FullName); configLines[configLines.IndexOf(x => x.StartsWith("IndexCalculationDuration: "))] = "IndexCalculationDuration: 15.0"; //configLines[configLines.IndexOf(x => x.StartsWith("BgNoiseBuffer: "))] = "BgNoiseBuffer: 5.0"; configLines[configLines.IndexOf(x => x.StartsWith("FrequencyScale: Linear"))] = "FrequencyScale: " + fst; // the is the only octave scale currently functioning for IndexCalculate class configLines[configLines.IndexOf(x => x.StartsWith("FrameLength"))] = $"FrameLength: {freqScale.WindowSize}"; configLines[configLines.IndexOf(x => x.StartsWith("ResampleRate: "))] = "ResampleRate: 64000"; // write the edited Config file to temporary output directory var newConfigPath = this.outputDirectory.CombineFile("Towsey.Acoustic.yml"); File.WriteAllLines(newConfigPath.FullName, configLines); PathHelper.ResolveConfigFile("IndexPropertiesConfig.yml").CopyTo(this.outputDirectory.CombineFile("IndexPropertiesConfig.yml").FullName); var arguments = new AnalyseLongRecording.Arguments { Source = recordingPath, Config = newConfigPath.FullName, Output = this.outputDirectory, MixDownToMono = true, Parallel = !Debugger.IsAttached, }; AnalyseLongRecording.Execute(arguments); var resultsDirectory = this.outputDirectory.Combine("Towsey.Acoustic"); var listOfFiles = resultsDirectory.EnumerateFiles().ToArray(); Assert.AreEqual(19, listOfFiles.Length); var csvCount = listOfFiles.Count(f => f.Name.EndsWith(".csv")); Assert.AreEqual(15, csvCount); var jsonCount = listOfFiles.Count(f => f.Name.EndsWith(".json")); Assert.AreEqual(2, jsonCount); var pngCount = listOfFiles.Count(f => f.Name.EndsWith(".png")); Assert.AreEqual(2, pngCount); var bgnFile = resultsDirectory.CombineFile(recordingName + "__Towsey.Acoustic.BGN.csv"); double[,] actualBgn = Csv.ReadMatrixFromCsv <double>(bgnFile, TwoDimensionalArray.None); var expectedSpectrumFile = PathHelper.ResolveAsset("LongDuration", "BgnMatrix.OctaveScale.csv"); // uncomment the following line when first produce the array // bgnFile.CopyTo(expectedSpectrumFile.FullName); // compare actual BGN file with expected file. var expectedBgn = Csv.ReadMatrixFromCsv <double>(expectedSpectrumFile, TwoDimensionalArray.None); CollectionAssert.That.AreEqual(expectedBgn, actualBgn, 0.000_000_001); var array = MatrixTools.GetRow(actualBgn, 0); Assert.AreEqual(28, actualBgn.RowLength()); Assert.AreEqual(256, array.Length); // draw array just to check peaks are in correct places - just for debugging purposes var ldsBgnSpectrumFile = this.outputDirectory.CombineFile("Spectrum2.png"); GraphsAndCharts.DrawGraph(array, "LD BGN SPECTRUM Octave", ldsBgnSpectrumFile); // ########################################## // SECOND part of test is to create the LD spectrograms because they are not created when IndexCalcDuration < 60 seconds // first read in the index generation data var icdPath = resultsDirectory.CombineFile(recordingName + "__IndexGenerationData.json"); var indexConfigData = Json.Deserialize <IndexGenerationData>(icdPath); var indexPropertiesConfig = PathHelper.ResolveConfigFile("IndexPropertiesConfig.yml"); var ldSpectrogramConfigFile = PathHelper.ResolveConfigFile("SpectrogramFalseColourConfig.yml"); var ldSpectrogramConfig = LdSpectrogramConfig.ReadYamlToConfig(ldSpectrogramConfigFile); ldSpectrogramConfig.FreqScale = fst.ToString(); // finally read in the dictionary of spectra string analysisType = "Towsey.Acoustic"; var keys = LDSpectrogramRGB.GetArrayOfAvailableKeys(); var dictionaryOfSpectra = IndexMatrices.ReadSpectralIndices(resultsDirectory, recordingName, analysisType, keys); LDSpectrogramRGB.DrawSpectrogramsFromSpectralIndices( inputDirectory: resultsDirectory, outputDirectory: resultsDirectory, ldSpectrogramConfig: ldSpectrogramConfig, indexPropertiesConfigPath: indexPropertiesConfig, indexGenerationData: indexConfigData, basename: recordingName, analysisType: analysisType, indexSpectrograms: dictionaryOfSpectra); // test number of images - should now be 23 listOfFiles = resultsDirectory.EnumerateFiles().ToArray(); pngCount = listOfFiles.Count(f => f.Name.EndsWith(".png")); Assert.AreEqual(22, pngCount); var twoMapsImagePath = resultsDirectory.CombineFile(recordingName + "__2Maps.png"); var twoMapsImage = Image.Load <Rgb24>(twoMapsImagePath.FullName); // image is (7*4) * 652 Assert.AreEqual(28, twoMapsImage.Width); Assert.AreEqual(652, twoMapsImage.Height); }