} //Analysis() public static Tuple<BaseSonogram, double[,], double[], List<AcousticEvent>> DetectHarmonics( AudioRecording recording, double intensityThreshold, int minHz, int minFormantgap, int maxFormantgap, double minDuration, int windowSize, double windowOverlap, TimeSpan segmentStartOffset) { //i: MAKE SONOGRAM int numberOfBins = 32; double binWidth = recording.SampleRate / (double)windowSize; int sr = recording.SampleRate; double frameDuration = windowSize / (double)sr; // Duration of full frame or window in seconds double frameOffset = frameDuration * (1 - windowOverlap); //seconds between starts of consecutive frames double framesPerSecond = 1 / frameOffset; //double framesPerSecond = sr / (double)windowSize; //int frameOffset = (int)(windowSize * (1 - overlap)); //int frameCount = (length - windowSize + frameOffset) / frameOffset; double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); var results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram( recording.WavReader.Samples, sr, epsilon, windowSize, windowOverlap); double[] avAbsolute = results2.Average; //average absolute value over the minute recording //double[] envelope = results2.Item2; double[,] matrix = results2 .AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. double windowPower = results2.WindowPower; //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int minBin = (int)Math.Round(minHz / binWidth); int maxHz = (int)Math.Round(minHz + (numberOfBins * binWidth)); int rowCount = matrix.GetLength(0); int colCount = matrix.GetLength(1); int maxbin = minBin + numberOfBins; double[,] subMatrix = MatrixTools.Submatrix(matrix, 0, minBin + 1, rowCount - 1, maxbin); //ii: DETECT HARMONICS int zeroBinCount = 5; //to remove low freq content which dominates the spectrum var results = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount); double[] intensity = results.Item1; //an array of periodicity scores double[] periodicity = results.Item2; //transfer periodicity info to a hits matrix. //intensity = DataTools.filterMovingAverage(intensity, 3); double[] scoreArray = new double[intensity.Length]; var hits = new double[rowCount, colCount]; for (int r = 0; r < rowCount; r++) { double relativePeriod = periodicity[r] / numberOfBins / 2; if (intensity[r] > intensityThreshold) { for (int c = minBin; c < maxbin; c++) { hits[r, c] = relativePeriod; } } double herzPeriod = periodicity[r] * binWidth; if (herzPeriod > minFormantgap && herzPeriod < maxFormantgap) { scoreArray[r] = 2 * intensity[r] * intensity[r]; //enhance high score wrt low score. } } scoreArray = DataTools.filterMovingAverage(scoreArray, 11); //iii: CONVERT TO ACOUSTIC EVENTS double maxDuration = 100000.0; //abitrary long number - do not want to restrict duration of machine noise List<AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( scoreArray, minHz, maxHz, framesPerSecond, binWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); hits = null; //set up the songogram to return. Use the existing amplitude sonogram int bitsPerSample = recording.WavReader.BitsPerSample; TimeSpan duration = recording.Duration; NoiseReductionType nrt = SNR.KeyToNoiseReductionType("STANDARD"); var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram( recording.BaseName, windowSize, windowOverlap, bitsPerSample, windowPower, sr, duration, nrt, matrix); sonogram.DecibelsNormalised = new double[rowCount]; //foreach frame or time step for (int i = 0; i < rowCount; i++) { sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]); } sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised); return Tuple.Create(sonogram, hits, scoreArray, predictedEvents); } //end Execute_HDDetect
public void TestKmeansClustering() { var outputDir = this.outputDirectory; var recordingsPath = PathHelper.ResolveAssetPath("FeatureLearning"); var folderPath = Path.Combine(recordingsPath, "random_audio_segments"); var outputImagePath = Path.Combine(outputDir.FullName, "ReconstrcutedSpectrogram.png"); // check whether there is any file in the folder/subfolders if (Directory.GetFiles(folderPath, "*", SearchOption.AllDirectories).Length == 0) { throw new ArgumentException("The folder of recordings is empty. Test will fail!"); } // get the nyquist value from the first wav file in the folder of recordings int nq = new AudioRecording(Directory.GetFiles(folderPath, "*.wav")[0]).Nyquist; int nyquist = nq; int frameSize = 1024; int finalBinCount = 128; int hertzInterval = 1000; FreqScaleType scaleType = FreqScaleType.Mel; var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval); var sonoConfig = new SonogramConfig { WindowSize = frameSize, //WindowOverlap is set based on the fact that each 24 frames is equal to 1 second WindowOverlap = 0.1028, DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, NoiseReductionType = NoiseReductionType.None, }; int numberOfFreqBand = 4; int patchWidth = finalBinCount / numberOfFreqBand; int patchHeight = 1; int numberOfRandomPatches = 20; // Define variable number of "randomPatch" lists based on "numberOfFreqBand" Dictionary <string, List <double[, ]> > randomPatchLists = new Dictionary <string, List <double[, ]> >(); for (int i = 0; i < numberOfFreqBand; i++) { randomPatchLists.Add(string.Format("randomPatch{0}", i.ToString()), new List <double[, ]>()); } List <double[, ]> randomPatches = new List <double[, ]>(); foreach (string filePath in Directory.GetFiles(folderPath, "*.wav")) { FileInfo fileInfo = filePath.ToFileInfo(); // process the wav file if it is not empty if (fileInfo.Length != 0) { var recording = new AudioRecording(filePath); sonoConfig.SourceFName = recording.BaseName; var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // DO RMS NORMALIZATION sonogram.Data = SNR.RmsNormalization(sonogram.Data); // DO NOISE REDUCTION sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data); // creating matrices from different freq bands of the source spectrogram List <double[, ]> allSubmatrices = PatchSampling.GetFreqBandMatrices(sonogram.Data, numberOfFreqBand); // Second: selecting random patches from each freq band matrix and add them to the corresponding patch list int count = 0; while (count < allSubmatrices.Count) { randomPatchLists[string.Format("randomPatch{0}", count.ToString())].Add(PatchSampling.GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numberOfRandomPatches, PatchSampling.SamplingMethod.Random).ToMatrix()); count++; } } } foreach (string key in randomPatchLists.Keys) { randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key])); } // convert list of random patches matrices to one matrix int numberOfClusters = 32; List <KMeansClusterCollection> allClusteringOutput = new List <KMeansClusterCollection>(); for (int i = 0; i < randomPatches.Count; i++) { double[,] patchMatrix = randomPatches[i]; // Do k-means clustering string pathToClusterCsvFile = Path.Combine(outputDir.FullName, "ClusterCentroids" + i.ToString() + ".csv"); var clusteringOutput = KmeansClustering.Clustering(patchMatrix, numberOfClusters); // sorting clusters based on size and output it to a csv file Dictionary <int, double> clusterIdSize = clusteringOutput.ClusterIdSize; int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize); // Write cluster ID and size to a CSV file string pathToClusterSizeCsvFile = Path.Combine(outputDir.FullName, "ClusterSize" + i.ToString() + ".csv"); Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize); // Draw cluster image directly from clustering output List <KeyValuePair <int, double[]> > listCluster = clusteringOutput.ClusterIdCentroid.ToList(); double[][] centroids = new double[listCluster.Count][]; for (int j = 0; j < listCluster.Count; j++) { centroids[j] = listCluster[j].Value; } allClusteringOutput.Add(clusteringOutput.Clusters); List <double[, ]> allCentroids = new List <double[, ]>(); for (int k = 0; k < centroids.Length; k++) { // convert each centroid to a matrix in order of cluster ID // OR: in order of cluster size double[,] centroid = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, patchHeight); // normalize each centroid double[,] normalizedCentroid = DataTools.normalise(centroid); // add a row of zero to each centroid double[,] newCentroid = PatchSampling.AddRow(normalizedCentroid); allCentroids.Add(newCentroid); } // concatenate all centroids double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids); // Draw clusters var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix); clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone); var outputClusteringImage = Path.Combine(outputDir.FullName, "ClustersWithGrid" + i.ToString() + ".bmp"); FrequencyScale.DrawFrequencyLinesOnImage((Image <Rgb24>)clusterImage, freqScale, includeLabels: false); clusterImage.Save(outputClusteringImage); } //+++++++++++++++++++++++++++++++++++++++++++Reconstructing a target spectrogram from sequential patches and the cluster centroids var recording2Path = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); var recording2 = new AudioRecording(recording2Path); var sonogram2 = new SpectrogramStandard(sonoConfig, recording2.WavReader); var targetSpec = sonogram2.Data; // Do RMS normalization sonogram2.Data = SNR.RmsNormalization(sonogram2.Data); // NOISE REDUCTION sonogram2.Data = PcaWhitening.NoiseReduction(sonogram2.Data); // extracting sequential patches from the target spectrogram List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(sonogram2.Data, numberOfFreqBand); double[][,] matrices2 = allSubmatrices2.ToArray(); List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>(); for (int i = 0; i < matrices2.GetLength(0); i++) { int rows = matrices2[i].GetLength(0); int columns = matrices2[i].GetLength(1); var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential); allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix()); } List <double[, ]> convertedSpectrogram = new List <double[, ]>(); int columnPerFreqBand = sonogram2.Data.GetLength(1) / numberOfFreqBand; for (int i = 0; i < allSequentialPatchMatrix.Count; i++) { double[,] reconstructedSpec2 = KmeansClustering.ReconstructSpectrogram(allSequentialPatchMatrix.ToArray()[i], allClusteringOutput.ToArray()[i]); convertedSpectrogram.Add(PatchSampling.ConvertPatches(reconstructedSpec2, patchWidth, patchHeight, columnPerFreqBand)); } sonogram2.Data = PatchSampling.ConcatFreqBandMatrices(convertedSpectrogram); // DO DRAW SPECTROGRAM var reconstructedSpecImage = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + freqScale.ScaleType.ToString(), freqScale.GridLineLocations); reconstructedSpecImage.Save(outputImagePath); // DO UNIT TESTING Assert.AreEqual(targetSpec.GetLength(0), sonogram2.Data.GetLength(0)); Assert.AreEqual(targetSpec.GetLength(1), sonogram2.Data.GetLength(1)); }
} // FELTWithBinaryTemplate() /// <summary> /// Scans a recording given a dicitonary of parameters and a syntactic template /// Template has a different orientation to others. /// </summary> /// <param name="sonogram"></param> /// <param name="dict"></param> /// <param name="templateMatrix"></param> /// <param name="segmentStartOffset"></param> /// <param name="recording"></param> /// <param name="templatePath"></param> /// <returns></returns> public static Tuple <SpectrogramStandard, List <AcousticEvent>, double[]> FELTWithSprTemplate(SpectrogramStandard sonogram, Dictionary <string, string> dict, char[,] templateMatrix, TimeSpan segmentStartOffset) { //i: get parameters from dicitonary string callName = dict[FeltTemplate_Create.key_CALL_NAME]; bool doSegmentation = bool.Parse(dict[FeltTemplate_Create.key_DO_SEGMENTATION]); double smoothWindow = double.Parse(dict[FeltTemplate_Create.key_SMOOTH_WINDOW]); //before segmentation int minHz = int.Parse(dict[FeltTemplate_Create.key_MIN_HZ]); int maxHz = int.Parse(dict[FeltTemplate_Create.key_MAX_HZ]); double minDuration = double.Parse(dict[FeltTemplate_Create.key_MIN_DURATION]); //min duration of event in seconds double dBThreshold = double.Parse(dict[FeltTemplate_Create.key_DECIBEL_THRESHOLD]); // = 9.0; // dB threshold dBThreshold = 4.0; int binCount = (int)(maxHz / sonogram.FBinWidth) - (int)(minHz / sonogram.FBinWidth) + 1; Log.WriteLine("Freq band: {0} Hz - {1} Hz. (Freq bin count = {2})", minHz, maxHz, binCount); //ii: TEMPLATE INFO double templateDuration = templateMatrix.GetLength(0) / sonogram.FramesPerSecond; Log.WriteIfVerbose("Template duration = {0:f3} seconds or {1} frames.", templateDuration, templateMatrix.GetLength(0)); Log.WriteIfVerbose("Min Duration: " + minDuration + " seconds"); //iii: DO SEGMENTATION double segmentationThreshold = 2.0; // Standard deviations above backgorund noise double maxDuration = double.MaxValue; // Do not constrain maximum length of events. var tuple1 = AcousticEvent.GetSegmentationEvents((SpectrogramStandard)sonogram, doSegmentation, segmentStartOffset, minHz, maxHz, smoothWindow, segmentationThreshold, minDuration, maxDuration); var segmentEvents = tuple1.Item1; //iv: Score sonogram for events matching template //############################################################################################################################################# var tuple2 = FindMatchingEvents.Execute_Spr_Match(templateMatrix, sonogram, segmentEvents, minHz, maxHz, dBThreshold); //var tuple2 = FindMatchingEvents.Execute_StewartGage(target, dynamicRange, (SpectralSonogram)sonogram, segmentEvents, minHz, maxHz, minDuration); //var tuple2 = FindMatchingEvents.Execute_SobelEdges(target, dynamicRange, (SpectralSonogram)sonogram, segmentEvents, minHz, maxHz, minDuration); //var tuple2 = FindMatchingEvents.Execute_MFCC_XCOR(target, dynamicRange, sonogram, segmentEvents, minHz, maxHz, minDuration); var scores = tuple2.Item1; //############################################################################################################################################# //v: PROCESS SCORE ARRAY //scores = DataTools.filterMovingAverage(scores, 3); LoggedConsole.WriteLine("Scores: min={0:f4}, max={1:f4}, threshold={2:f2}dB", scores.Min(), scores.Max(), dBThreshold); //Set (scores < 0.0) = 0.0; for (int i = 0; i < scores.Length; i++) { if (scores[i] < 0.0) { scores[i] = 0.0; } } //vi: EXTRACT EVENTS List <AcousticEvent> matchEvents = AcousticEvent.ConvertScoreArray2Events(scores, minHz, maxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, dBThreshold, minDuration, maxDuration, segmentStartOffset); foreach (AcousticEvent ev in matchEvents) { ev.FileName = sonogram.Configuration.SourceFName; ev.Name = sonogram.Configuration.CallName; } // Edit the events to correct the start time, duration and end of events to match the max score and length of the template. AdjustEventLocation(matchEvents, callName, templateDuration, sonogram.Duration.TotalSeconds); return(Tuple.Create(sonogram, matchEvents, scores)); } // FELTWithSprTemplate()
public static void Execute(Arguments arguments) { if (arguments == null) { arguments = Dev(); } string title = "# FIND OTHER ACOUSTIC EVENTS LIKE THIS"; string date = "# DATE AND TIME: " + DateTime.Now; Log.WriteLine(title); Log.WriteLine(date); Log.Verbosity = 1; Log.WriteIfVerbose("# Recording =" + arguments.Source); //the recording to be scanned Log.WriteIfVerbose("# Template list =" + arguments.Config); //the path to a file containing the paths to template locations, one template per line Log.WriteIfVerbose("# Output folder =" + arguments.Output); //name of output dir var allEvents = new List <AcousticEvent>(); var scoresList = new List <double[]>(); var thresholdList = new List <double>(); //i: GET RECORDING AudioRecording recording = new AudioRecording(arguments.Source.FullName); //if (recording.SampleRate != 22050) recording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE int sr = recording.SampleRate; //ii: MAKE SONOGRAM Log.WriteLine("Start sonogram."); SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = recording.BaseName; sonoConfig.WindowOverlap = FeltFrameOverlap; // set default value sonoConfig.DoMelScale = false; sonoConfig.NoiseReductionType = NoiseReductionType.Standard; AmplitudeSonogram basegram = new AmplitudeSonogram(sonoConfig, recording.WavReader); SpectrogramStandard sonogram = new SpectrogramStandard(basegram); //spectrogram has dim[N,257] Log.WriteLine("Signal: Duration={0}, Sample Rate={1}", sonogram.Duration, sr); Log.WriteLine("Frames: Size={0}, Count={1}, Duration={2:f1}ms, Overlap={5:f0}%, Offset={3:f1}ms, Frames/s={4:f1}", sonogram.Configuration.WindowSize, sonogram.FrameCount, (sonogram.FrameDuration * 1000), (sonogram.FrameStep * 1000), sonogram.FramesPerSecond, FeltFrameOverlap * 100); //iii: Get zip paths and the results Tuple List <string> zipList = FileTools.ReadTextFile(arguments.Config.FullName); Tuple <SpectrogramStandard, List <AcousticEvent>, double[]> results = null; //set up the results Tuple foreach (string zipPath in zipList) { if (zipPath.StartsWith("#")) { continue; // commented line } if (zipPath.Length < 2) { continue; // empty line } //i: get params file ZipFile.ExtractToDirectory(arguments.Output.FullName, zipPath); string zipName = Path.GetFileNameWithoutExtension(zipPath); string[] parts = zipName.Split('_'); string paramsPath = Path.Combine(arguments.Output.FullName, parts[0] + "_" + parts[1] + "_Params.txt"); string id = parts[0] + "_" + parts[1]; Log.WriteIfVerbose("################################################### " + id + " ########################################################"); //ii: READ PARAMETER VALUES FROM INI FILE var config = new ConfigDictionary(paramsPath); Dictionary <string, string> dict = config.GetTable(); //Dictionary<string, string>.KeyCollection keys = dict.Keys; //int DRAW_SONOGRAMS = Int32.Parse(dict[FeltTemplate_Create.key_DRAW_SONOGRAMS]); //options to draw sonogram dict[FeltTemplate_Create.key_DECIBEL_THRESHOLD] = "4.0"; dict[FeltTemplate_Create.key_MIN_DURATION] = "0.02"; if (zipName.EndsWith("binaryTemplate")) { string templatePath = Path.Combine(arguments.Output.FullName, id + "_binary.bmp"); double[,] templateMatrix = FindMatchingEvents.ReadImage2BinaryMatrixDouble(templatePath); results = FELTWithBinaryTemplate(sonogram, dict, templateMatrix, TimeSpan.Zero); } else if (zipName.EndsWith("trinaryTemplate")) { string templatePath = Path.Combine(arguments.Output.FullName, id + "_trinary.bmp"); double[,] templateMatrix = FindMatchingEvents.ReadImage2TrinaryMatrix(templatePath); results = FELTWithBinaryTemplate(sonogram, dict, templateMatrix, TimeSpan.Zero); } else if (zipName.EndsWith("syntacticTemplate")) { string templatePath = Path.Combine(arguments.Output.FullName, id + "_spr.txt"); char[,] templateMatrix = FindMatchingEvents.ReadTextFile2CharMatrix(templatePath); results = FELTWithSprTemplate(sonogram, dict, templateMatrix, TimeSpan.Zero); } else { Log.WriteLine("ERROR! UNKNOWN TEMPLATE: Zip file has unrecognised suffix:" + zipName); continue; } //get results sonogram = results.Item1; var matchingEvents = results.Item2; var scores = results.Item3; double matchThreshold = double.Parse(dict[FeltTemplate_Create.key_DECIBEL_THRESHOLD]); Log.WriteLine("# Finished detecting events like target: " + id); Log.WriteLine("# Matching Event Count = " + matchingEvents.Count); Log.WriteLine(" @ threshold = {0:f2}", matchThreshold); // accumulate results allEvents.AddRange(matchingEvents); scoresList.Add(scores); thresholdList.Add(matchThreshold); //v: write events count to results info file. double sigDuration = sonogram.Duration.TotalSeconds; string fname = arguments.Source.Name; string str = string.Format("{0}\t{1}\t{2}", fname, sigDuration, matchingEvents.Count); StringBuilder sb = AcousticEvent.WriteEvents(matchingEvents, str); FileTools.WriteTextFile("opPath", sb.ToString()); } // foreach (string zipPath in zipList) Log.WriteLine("\n\n\n##########################################################"); Log.WriteLine("# Finished detecting events"); Log.WriteLine("# Event Count = " + allEvents.Count); foreach (AcousticEvent ae in allEvents) { Log.WriteLine("# Event name = {0} ############################", ae.Name); Log.WriteLine("# Event time = {0:f2} to {1:f2} (frames {2}-{3}).", ae.TimeStart, ae.TimeEnd, ae.Oblong.RowTop, ae.Oblong.RowBottom); Log.WriteLine("# Event score= {0:f2}.", ae.Score); } int percentOverlap = 50; allEvents = PruneOverlappingEvents(allEvents, percentOverlap); Log.WriteLine("\n##########################################################"); Log.WriteLine("# Finished pruning events"); Log.WriteLine("# Event Count = " + allEvents.Count); WriteEventNames(allEvents); //WriteScoreAverages2Console(scoresList); //draw images of sonograms int DRAW_SONOGRAMS = 2; FileInfo opImagePath = arguments.Output.CombineFile(Path.GetFileNameWithoutExtension(arguments.Source.Name) + "_matchingEvents.png"); if (DRAW_SONOGRAMS == 2) { DrawSonogram(sonogram, opImagePath, allEvents, thresholdList, scoresList); } else if ((DRAW_SONOGRAMS == 1) && (allEvents.Count > 0)) { DrawSonogram(sonogram, opImagePath, allEvents, thresholdList, scoresList); } Log.WriteLine("# FINISHED passing all templates over recording:- " + arguments.Source.Name); }
/// <summary> /// Calculates the following spectrograms as per settings in the Images array in the config file: Towsey.SpectrogramGenerator.yml: /// Waveform. /// DecibelSpectrogram. /// DecibelSpectrogramNoiseReduced. /// CepstralSpectrogram. /// DifferenceSpectrogram. /// AmplitudeSpectrogramLocalContrastNormalization. /// Experimental. /// Comment the config.yml file with a hash, those spectrograms that are not required. /// </summary> /// <param name="sourceRecording">The name of the original recording.</param> /// <param name="config">Contains parameter info to make spectrograms.</param> /// <param name="sourceRecordingName">.Name of source recording. Required only spectrogram labels.</param> public static AudioToSonogramResult GenerateSpectrogramImages( FileInfo sourceRecording, SpectrogramGeneratorConfig config, string sourceRecordingName) { //int signalLength = recordingSegment.WavReader.GetChannel(0).Length; var recordingSegment = new AudioRecording(sourceRecording.FullName); int sampleRate = recordingSegment.WavReader.SampleRate; var result = new AudioToSonogramResult(); var requestedImageTypes = config.Images ?? new[] { SpectrogramImageType.DecibelSpectrogram }; var @do = requestedImageTypes.ToHashSet(); int frameSize = config.GetIntOrNull("FrameLength") ?? 512; int frameStep = config.GetIntOrNull("FrameStep") ?? 441; // must calculate this because used later on. double frameOverlap = (frameSize - frameStep) / (double)frameSize; // Default noiseReductionType = Standard var bgNoiseThreshold = config.BgNoiseThreshold; // threshold for drawing the difference spectrogram var differenceThreshold = config.DifferenceThreshold; // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameSize, frameStep); var sonoConfig = new SonogramConfig() { epsilon = recordingSegment.Epsilon, SampleRate = sampleRate, WindowSize = frameSize, WindowStep = frameStep, WindowOverlap = frameOverlap, WindowPower = dspOutput1.WindowPower, Duration = recordingSegment.Duration, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = bgNoiseThreshold, }; var images = new Dictionary <SpectrogramImageType, Image <Rgb24> >(requestedImageTypes.Length); // IMAGE 1) draw the WAVEFORM if (@do.Contains(Waveform)) { var minValues = dspOutput1.MinFrameValues; var maxValues = dspOutput1.MaxFrameValues; int height = config.WaveformHeight; var waveformImage = GetWaveformImage(minValues, maxValues, height); // add in the title bar and time scales. string title = $"WAVEFORM - {sourceRecordingName} (min value={dspOutput1.MinSignalValue:f3}, max value={dspOutput1.MaxSignalValue:f3})"; var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram( title, waveformImage.Width, ImageTags[Waveform]); var startTime = TimeSpan.Zero; var xAxisTicInterval = TimeSpan.FromSeconds(1); TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(frameStep / (double)sampleRate); var labelInterval = TimeSpan.FromSeconds(5); waveformImage = BaseSonogram.FrameSonogram( waveformImage, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval); images.Add(Waveform, waveformImage); } // Draw various decibel spectrograms var decibelTypes = new[] { SpectrogramImageType.DecibelSpectrogram, DecibelSpectrogramNoiseReduced, DifferenceSpectrogram, Experimental }; if (@do.Overlaps(decibelTypes)) { // disable noise removal for first two spectrograms var disabledNoiseReductionType = sonoConfig.NoiseReductionType; sonoConfig.NoiseReductionType = NoiseReductionType.None; //Get the decibel spectrogram var decibelSpectrogram = new SpectrogramStandard(sonoConfig, dspOutput1.AmplitudeSpectrogram); result.DecibelSpectrogram = decibelSpectrogram; double[,] dbSpectrogramData = (double[, ])decibelSpectrogram.Data.Clone(); // IMAGE 2) Display the DecibelSpectrogram if (@do.Contains(SpectrogramImageType.DecibelSpectrogram)) { images.Add( SpectrogramImageType.DecibelSpectrogram, decibelSpectrogram.GetImageFullyAnnotated( $"DECIBEL SPECTROGRAM ({sourceRecordingName})", ImageTags[SpectrogramImageType.DecibelSpectrogram])); } if (@do.Overlaps(new[] { DecibelSpectrogramNoiseReduced, Experimental, CepstralSpectrogram })) { sonoConfig.NoiseReductionType = disabledNoiseReductionType; sonoConfig.NoiseReductionParameter = bgNoiseThreshold; double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram.Data); decibelSpectrogram.Data = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram.Data, spectralDecibelBgn); decibelSpectrogram.Data = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram.Data, nhThreshold: bgNoiseThreshold); // IMAGE 3) DecibelSpectrogram - noise reduced if (@do.Contains(DecibelSpectrogramNoiseReduced)) { images.Add( DecibelSpectrogramNoiseReduced, decibelSpectrogram.GetImageFullyAnnotated( $"DECIBEL SPECTROGRAM + Lamel noise subtraction. ({sourceRecordingName})", ImageTags[DecibelSpectrogramNoiseReduced])); } // IMAGE 4) EXPERIMENTAL Spectrogram if (@do.Contains(Experimental)) { sonoConfig.NoiseReductionType = disabledNoiseReductionType; images.Add( Experimental, GetDecibelSpectrogram_Ridges( dbSpectrogramData, decibelSpectrogram, sourceRecordingName)); } } // IMAGE 5) draw difference spectrogram. This is derived from the original decibel spectrogram if (@do.Contains(DifferenceSpectrogram)) { //var differenceThreshold = configInfo.GetDoubleOrNull("DifferenceThreshold") ?? 3.0; var differenceImage = GetDifferenceSpectrogram(dbSpectrogramData, differenceThreshold); differenceImage = BaseSonogram.GetImageAnnotatedWithLinearHertzScale( differenceImage, sampleRate, frameStep, $"DECIBEL DIFFERENCE SPECTROGRAM ({sourceRecordingName})", ImageTags[DifferenceSpectrogram]); images.Add(DifferenceSpectrogram, differenceImage); } } // IMAGE 6) Cepstral Spectrogram if (@do.Contains(CepstralSpectrogram)) { images.Add( CepstralSpectrogram, GetCepstralSpectrogram(sonoConfig, recordingSegment, sourceRecordingName)); } // IMAGE 7) AmplitudeSpectrogram_LocalContrastNormalization if (@do.Contains(AmplitudeSpectrogramLocalContrastNormalization)) { var neighborhoodSeconds = config.NeighborhoodSeconds; var lcnContrastParameter = config.LcnContrastLevel; images.Add( AmplitudeSpectrogramLocalContrastNormalization, GetLcnSpectrogram( sonoConfig, recordingSegment, sourceRecordingName, neighborhoodSeconds, lcnContrastParameter)); } // now pick and combine images in order user specified var sortedImages = requestedImageTypes.Select(x => images[x]); // COMBINE THE SPECTROGRAM IMAGES result.CompositeImage = ImageTools.CombineImagesVertically(sortedImages.ToArray()); return(result); }
/// <summary> /// Detects oscillations in a given freq bin. /// there are several important parameters for tuning. /// a) DCTLength: Good values are 0.25 to 0.50 sec. Do not want too long because DCT requires stationarity. /// Do not want too short because too small a range of oscillations /// b) DCTindex: Sets lower bound for oscillations of interest. Index refers to array of coefficient returned by DCT. /// Array has same length as the length of the DCT. Low freq oscillations occur more often by chance. Want to exclude them. /// c) MinAmplitude: minimum acceptable value of a DCT coefficient if hit is to be accepted. /// The algorithm is sensitive to this value. A lower value results in more oscillation hits being returned. /// </summary> /// <param name="sonogram">A spectrogram.</param> /// <param name="minHz">min freq bin of search band.</param> /// <param name="maxHz">max freq bin of search band.</param> /// <param name="dctDuration">number of values.</param> /// <param name="minOscilFreq">minimum oscillation freq.</param> /// <param name="maxOscilFreq">maximum oscillation freq.</param> /// <param name="dctThreshold">threshold - do not accept a DCT coefficient if its value is less than this threshold.</param> public static double[,] DetectOscillations(SpectrogramStandard sonogram, int minHz, int maxHz, double dctDuration, int minOscilFreq, int maxOscilFreq, double dctThreshold) { int minBin = (int)(minHz / sonogram.FBinWidth); int maxBin = (int)(maxHz / sonogram.FBinWidth); int dctLength = (int)Math.Round(sonogram.FramesPerSecond * dctDuration); int minIndex = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int maxIndex = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int midOscilFreq = minOscilFreq + ((maxOscilFreq - minOscilFreq) / 2); //safety check if (maxIndex > dctLength) { return(null); } int rows = sonogram.Data.GetLength(0); int cols = sonogram.Data.GetLength(1); double[,] hits = new double[rows, cols]; double[,] matrix = sonogram.Data; double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients //following two lines write matrix of cos values for checking. //string txtPath = @"C:\SensorNetworks\Output\cosines.txt"; //FileTools.WriteMatrix2File_Formatted(cosines, txtPath, "F3"); //following two lines write bmp image of cos values for checking. //string bmpPath = @"C:\SensorNetworks\Output\cosines.png"; //ImageTools.DrawMatrix(cosines, bmpPath, true); //traverse columns - skip DC column for (int c = minBin; c <= maxBin; c++) { var dctArray = new double[dctLength]; for (int r = 0; r < rows - dctLength; r++) { // extract array and ready for DCT for (int i = 0; i < dctLength; i++) { dctArray[i] = matrix[r + i, c]; } int lowerDctBound = minIndex / 4; var dctCoeff = DoDct(dctArray, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff); //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoeff[indexOfMaxValue] > dctThreshold) { for (int i = 0; i < dctLength; i++) { hits[r + i, c] = midOscilFreq; } } r += 5; //skip rows } c++; //do alternate columns } return(hits); }
/// <inheritdoc/> public override RecognizerResults Recognize( AudioRecording audioRecording, Config genericConfig, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var configuration = (GenericRecognizerConfig)genericConfig; if (configuration.Profiles.NotNull() && configuration.Profiles.Count == 0) { throw new ConfigFileException( "The generic recognizer needs at least one profile set. 0 were found."); } int count = configuration.Profiles.Count; var message = $"Found {count} analysis profile(s): " + configuration.Profiles.Keys.Join(", "); Log.Info(message); var allResults = new RecognizerResults() { Events = new List <AcousticEvent>(), Hits = null, ScoreTrack = null, Plots = new List <Plot>(), Sonogram = null, }; // Now process each of the profiles foreach (var(profileName, profileConfig) in configuration.Profiles) { Log.Info("Processing profile: " + profileName); List <AcousticEvent> acousticEvents; var plots = new List <Plot>(); SpectrogramStandard sonogram; Log.Debug($"Using the {profileName} algorithm... "); if (profileConfig is CommonParameters parameters) { if (profileConfig is BlobParameters || profileConfig is OscillationParameters || profileConfig is WhistleParameters || profileConfig is HarmonicParameters) { sonogram = new SpectrogramStandard(ParametersToSonogramConfig(parameters), audioRecording.WavReader); if (profileConfig is OscillationParameters op) { Oscillations2012.Execute( sonogram, op.MinHertz.Value, op.MaxHertz.Value, op.DctDuration, op.MinOscillationFrequency, op.MaxOscillationFrequency, op.DctThreshold, op.EventThreshold, op.MinDuration.Value, op.MaxDuration.Value, out var scores, out acousticEvents, out var hits, segmentStartOffset); //plots.Add(new Plot($"{profileName} (:OscillationScore)", scores, op.EventThreshold)); var plot = PreparePlot(scores, $"{profileName} (:OscillationScore)", op.EventThreshold); plots.Add(plot); } else if (profileConfig is BlobParameters bp) { //get the array of intensity values minus intensity in side/buffer bands. //i.e. require silence in side-bands. Otherwise might simply be getting part of a broader band acoustic event. var decibelArray = SNR.CalculateFreqBandAvIntensityMinusBufferIntensity( sonogram.Data, bp.MinHertz.Value, bp.MaxHertz.Value, bp.BottomHertzBuffer.Value, bp.TopHertzBuffer.Value, sonogram.NyquistFrequency); // prepare plot of resultant blob decibel array. var plot = PreparePlot(decibelArray, $"{profileName} (Blob:db Intensity)", bp.DecibelThreshold.Value); plots.Add(plot); // iii: CONVERT blob decibel SCORES TO ACOUSTIC EVENTS. // Note: This method does NOT do prior smoothing of the dB array. acousticEvents = AcousticEvent.GetEventsAroundMaxima( decibelArray, segmentStartOffset, bp.MinHertz.Value, bp.MaxHertz.Value, bp.DecibelThreshold.Value, TimeSpan.FromSeconds(bp.MinDuration.Value), TimeSpan.FromSeconds(bp.MaxDuration.Value), sonogram.FramesPerSecond, sonogram.FBinWidth); } else if (profileConfig is WhistleParameters wp) { //get the array of intensity values minus intensity in side/buffer bands. double[] decibelArray; (acousticEvents, decibelArray) = WhistleParameters.GetWhistles( sonogram, wp.MinHertz.Value, wp.MaxHertz.Value, sonogram.NyquistFrequency, wp.DecibelThreshold.Value, wp.MinDuration.Value, wp.MaxDuration.Value, segmentStartOffset); var plot = PreparePlot(decibelArray, $"{profileName} (Whistle:dB Intensity)", wp.DecibelThreshold.Value); plots.Add(plot); } else if (profileConfig is HarmonicParameters hp) { //get the array of intensity values minus intensity in side/buffer bands. double[] scoreArray; (acousticEvents, scoreArray) = HarmonicParameters.GetComponentsWithHarmonics( sonogram, hp.MinHertz.Value, hp.MaxHertz.Value, sonogram.NyquistFrequency, hp.DecibelThreshold.Value, hp.DctThreshold.Value, hp.MinDuration.Value, hp.MaxDuration.Value, hp.MinFormantGap.Value, hp.MaxFormantGap.Value, segmentStartOffset); var plot = PreparePlot(scoreArray, $"{profileName} (Harmonics:dB Intensity)", hp.DecibelThreshold.Value); plots.Add(plot); } else { throw new InvalidOperationException(); } } else { throw new InvalidOperationException(); } //iV add additional info to the acoustic events acousticEvents.ForEach(ae => { ae.FileName = audioRecording.BaseName; ae.SpeciesName = parameters.SpeciesName; ae.Name = parameters.ComponentName; ae.Profile = profileName; ae.SegmentDurationSeconds = audioRecording.Duration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SetTimeAndFreqScales(sonogram.FrameStep, sonogram.FrameDuration, sonogram.FBinWidth); }); } else if (profileConfig is Aed.AedConfiguration ac) { var config = new SonogramConfig { NoiseReductionType = ac.NoiseReductionType, NoiseReductionParameter = ac.NoiseReductionParameter, }; sonogram = new SpectrogramStandard(config, audioRecording.WavReader); acousticEvents = Aed.CallAed(sonogram, ac, segmentStartOffset, audioRecording.Duration).ToList(); } else { throw new InvalidOperationException(); } // combine the results i.e. add the events list of call events. allResults.Events.AddRange(acousticEvents); allResults.Plots.AddRange(plots); // effectively keeps only the *last* sonogram produced allResults.Sonogram = sonogram; Log.Debug($"{profileName} event count = {acousticEvents.Count}"); // DEBUG PURPOSES COMMENT NEXT LINE //SaveDebugSpectrogram(allResults, genericConfig, outputDirectory, "name"); } return(allResults); }
/// <summary> /// THIS IS THE CORE DETECTION METHOD /// Detects the human voice /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double minDuration = double.Parse(configDict["MIN_DURATION"]); // seconds double maxDuration = double.Parse(configDict["MAX_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig { //default values config SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; var tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); //ii: DETECT HARMONICS int zeroBinCount = 4; //to remove low freq content which dominates the spectrum var results = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount); double[] intensity = results.Item1; double[] periodicity = results.Item2; //an array of periodicity scores //intensity = DataTools.filterMovingAverage(intensity, 3); //expect humans to have max power >100 and < 1000 Hz. Set these bounds int lowerHumanMaxBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise int upperHumanMaxBound = (int)(3000 / freqBinWidth); //ignore above 2500 hz double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < intensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < lowerHumanMaxBound; j++) { spectrum[j] = 0.0; } for (int j = upperHumanMaxBound; j < spectrum.Length; j++) { spectrum[j] = 0.0; } double[] peakvalues = DataTools.GetPeakValues(spectrum); int maxIndex1 = DataTools.GetMaxIndex(peakvalues); peakvalues[maxIndex1] = 0.0; int maxIndex2 = DataTools.GetMaxIndex(peakvalues); int avMaxBin = (maxIndex1 + maxIndex2) / 2; //int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); int freqWithMaxPower = (int)Math.Round(avMaxBin * freqBinWidth); double discount = 1.0; if (freqWithMaxPower > 1000) { discount = 0.0; } else if (freqWithMaxPower < 500) { discount = 0.0; } //set scoreArray[r] - ignore locations with low intensity if (intensity[r] > intensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = intensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( scoreArray, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); //remove isolated speech events - expect humans to talk like politicians //predictedEvents = Human2.FilterHumanSpeechEvents(predictedEvents); Plot plot = new Plot(AnalysisName, intensity, intensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { // common properties string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no name>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // BETTER TO CALCULATE THIS. IGNORE USER! // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]); // duration of DCT in seconds //double dctDuration = (double)configuration[AnalysisKeys.DctDuration]; // minimum acceptable value of a DCT coefficient //double dctThreshold = (double)configuration[AnalysisKeys.DctThreshold]; double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0; double decibelThreshold = configuration.GetDouble("DecibelThreshold"); //double minPeriod = (double)configuration["MinPeriod"]; //: 0.18 //double maxPeriod = (double)configuration["MaxPeriod"]; // //int maxOscilRate = (int)Math.Ceiling(1 /minPeriod); //int minOscilRate = (int)Math.Floor(1 /maxPeriod); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in second var maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event var eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); // this default framesize and overlap is best for the White Hrron of Bhutan. const int frameSize = 2048; double windowOverlap = 0.0; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; var recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(maxHz / freqBinWidth) + 1; /* ############################################################################################################################################# * window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins * 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz * 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz * 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz * 2048 22050 92.8ms 21.5 10.7666 1472ms */ BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); // var templates = GetTemplatesForAlgorithm1(14); var amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); bool[] peakArray = new bool[rowCount]; var amplitudeScores = new double[rowCount]; var hits = new double[rowCount, colCount]; const int maxTemplateLength = 20; const int templateEndPadding = 7; const int templateOffset = 14; const int minimumGap = 4; const int maximumGap = 100; // first find the amplitude peaks for (int j = 1; j < amplitudeArray.Length - 1; j++) { if (amplitudeArray[j] < decibelThreshold) { continue; } if (amplitudeArray[j] > amplitudeArray[j - 1] && amplitudeArray[j] > amplitudeArray[j + 1]) { peakArray[j] = true; } } // get template for end of Herron call var endTemplate = GetEndTemplateForAlgorithm2(); // now search for peaks that are the correct distance apart. for (int i = 2; i < amplitudeArray.Length - maxTemplateLength - templateEndPadding; i++) { if (!peakArray[i]) { continue; } // calculate distance to next peak int distanceToNextPeak = CalculateDistanceToNextPeak(peakArray, i); // skip gaps that are too small or too large if (distanceToNextPeak < minimumGap || distanceToNextPeak > maximumGap) { continue; } // The herron call ends with a rising whip // Check end of call using end template if (distanceToNextPeak > maxTemplateLength) { int start = i - templateOffset; if (start < 0) { start = 0; } var endLocality = DataTools.Subarray(amplitudeArray, start, endTemplate.Length); double endScore = DataTools.CosineSimilarity(endLocality, endTemplate); for (int to = -templateOffset; to < endTemplate.Length - templateOffset; to++) { if (i + to >= 0 && endScore > amplitudeScores[i + to]) { amplitudeScores[i + to] = endScore; // hits[i, minBin] = 10; } } for (int k = 2; k < maxTemplateLength; k++) { amplitudeScores[i + k] = 0.0; } continue; } // Get the start template which depends on distance to next peak. var startTemplate = GetTemplateForAlgorithm2(distanceToNextPeak, templateEndPadding); // now calculate similarity of locality with the startTemplate var locality = DataTools.Subarray(amplitudeArray, i - 2, startTemplate.Length); // i-2 because first two places should be zero. double score = DataTools.CosineSimilarity(locality, startTemplate); for (int t = 0; t < startTemplate.Length; t++) { if (score > amplitudeScores[i + t]) { amplitudeScores[i + t] = score; hits[i, minBin] = 10; } } } // loop over peak array var smoothedScores = DataTools.filterMovingAverageOdd(amplitudeScores, 3); // iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var predictedEvents = AcousticEvent.ConvertScoreArray2Events( smoothedScores, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); var prunedEvents = new List <AcousticEvent>(); foreach (var ae in predictedEvents) { if (ae.EventDurationSeconds < minDuration) { continue; } // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = abbreviatedSpeciesName; prunedEvents.Add(ae); } // do a recognizer test. //CompareArrayWithBenchmark(scores, new FileInfo(recording.FilePath)); //CompareArrayWithBenchmark(prunedEvents, new FileInfo(recording.FilePath)); var plot = new Plot(this.DisplayName, amplitudeScores, eventThreshold); return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = prunedEvents, }); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { // WARNING: TODO TODO TODO = this method simply duplicates the CANETOAD analyser!!!!!!!!!!!!!!!!!!!!! ################### string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // BETTER TO CALCULATE THIS. IGNORE USER! // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]); // duration of DCT in seconds double dctDuration = configuration.GetDouble(AnalysisKeys.DctDuration); // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); // The default was 512 for Canetoad. // Framesize = 128 seems to work for Littoria fallax. // frame size int frameSize = configuration.GetInt(AnalysisKeys.KeyFrameSize); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap( recording.SampleRate, frameSize, maxOscilFreq); //windowOverlap = 0.75; // previous default // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = NoiseReductionType.None, }; // sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("STANDARD"); TimeSpan recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; /* ############################################################################################################################################# * window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins * 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz * 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz * 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz */ // int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; // int maxbin = minBin + numberOfBins - 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); // double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, (rowCount - 1), maxbin); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER //minDuration = 1.0; Oscillations2012.Execute( (SpectrogramStandard)sonogram, minHz, maxHz, dctDuration, minOscilFreq, maxOscilFreq, dctThreshold, eventThreshold, minDuration, maxDuration, out var scores, out var acousticEvents, out var hits, segmentStartOffset); acousticEvents.ForEach(ae => { ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = abbreviatedSpeciesName; }); var plot = new Plot(this.DisplayName, scores, eventThreshold); return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = acousticEvents, }); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new LitoriaNasutaConfig(); recognizerConfig.ReadConfigFile(configuration); // common properties string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no name>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // BETTER TO SET THESE. IGNORE USER! // This framesize is large because the oscillation we wish to detect is due to repeated croaks // having an interval of about 0.6 seconds. The overlap is also required to give smooth oscillation. const int frameSize = 1024; const double windowOverlap = 0.5; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // use the default HAMMING window //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.None NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.0, }; // Get the recording TimeSpan recordingDuration = recording.WavReader.Time; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = sr / (sonoConfig.WindowSize * (1 - windowOverlap)); // Get the alorithm parameters int minBin = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1; var decibelThreshold = 9.0; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER int rowCount = sonogram.Data.GetLength(0); // get the freq band as set by min and max Herz var frogBand = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // Now look for spectral maxima. For L.caerulea, the max should lie around 1100Hz +/-150 Hz. // Skip over spectra where maximum is not in correct location. int buffer = 200; var croakScoreArray = new double[rowCount]; var hzAtTopOfTopBand = recognizerConfig.DominantFreq + buffer; var hzAtBotOfTopBand = recognizerConfig.DominantFreq - buffer; var binAtTopOfTopBand = (int)Math.Round((hzAtTopOfTopBand - recognizerConfig.MinHz) / freqBinWidth); var binAtBotOfTopBand = (int)Math.Round((hzAtBotOfTopBand - recognizerConfig.MinHz) / freqBinWidth); var hzAtTopOfBotBand = recognizerConfig.SubdominantFreq + buffer; var hzAtBotOfBotBand = recognizerConfig.SubdominantFreq - buffer; var binAtTopOfBotBand = (int)Math.Round((hzAtTopOfBotBand - recognizerConfig.MinHz) / freqBinWidth); var binAtBotOfBotBand = (int)Math.Round((hzAtBotOfBotBand - recognizerConfig.MinHz) / freqBinWidth); // scan the frog band and get the decibel value of those spectra which have their maximum within the correct subband. for (int x = 0; x < rowCount; x++) { //extract spectrum var spectrum = MatrixTools.GetRow(frogBand, x); int maxIndex1 = DataTools.GetMaxIndex(spectrum); double maxValueInTopSubband = spectrum[maxIndex1]; if (maxValueInTopSubband < decibelThreshold) { continue; } // if max value not in correct sub-band then go to next spectrum if (maxIndex1 > binAtTopOfTopBand && maxIndex1 < binAtBotOfTopBand) { continue; } // minimise values in top sub-band so can find maximum in bottom sub-band for (int y = binAtBotOfTopBand; y < binAtTopOfTopBand; y++) { spectrum[y] = 0.0; } int maxIndex2 = DataTools.GetMaxIndex(spectrum); // if max value properly placed in top and bottom sub-bands then assign maxValue to croakScore array if (maxIndex2 < binAtTopOfBotBand && maxIndex2 > binAtBotOfTopBand) { croakScoreArray[x] = maxValueInTopSubband; } } // Perpare a normalised plot for later display with spectrogram DataTools.Normalise(croakScoreArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var text1 = string.Format($"Croak scores (threshold={decibelThreshold})"); var croakPlot1 = new Plot(text1, normalisedScores, normalisedThreshold); // extract potential croak events from the array of croak candidate var croakEvents = AcousticEvent.ConvertScoreArray2Events( croakScoreArray, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, recognizerConfig.EventThreshold, recognizerConfig.MinCroakDuration, recognizerConfig.MaxCroakDuration, segmentStartOffset); // add necesary info into the candidate events double[,] hits = null; var prunedEvents = new List <AcousticEvent>(); foreach (var ae in croakEvents) { // add additional info ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; prunedEvents.Add(ae); } /* * // DO NOT LOOK FOR A PULSE TRAIN because recording from Karlina does not have one for L.nasuta. * * // With those events that survive the above Array2Events process, we now extract a new array croak scores * croakScoreArray = AcousticEvent.ExtractScoreArrayFromEvents(prunedEvents, rowCount, recognizerConfig.AbbreviatedSpeciesName); * DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold); * var text2 = string.Format($"Croak events (threshold={decibelThreshold})"); * var croakPlot2 = new Plot(text2, normalisedScores, normalisedThreshold); * * * // Look for oscillations in the difference array * // duration of DCT in seconds * croakScoreArray = DataTools.filterMovingAverageOdd(croakScoreArray, 5); * double dctDuration = recognizerConfig.DctDuration; * // minimum acceptable value of a DCT coefficient * double dctThreshold = recognizerConfig.DctThreshold; * double minOscRate = 1 / recognizerConfig.MaxPeriod; * double maxOscRate = 1 / recognizerConfig.MinPeriod; * var dctScores = Oscillations2012.DetectOscillations(croakScoreArray, framesPerSecond, dctDuration, minOscRate, maxOscRate, dctThreshold); * * * // ###################################################################### * // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER * var events = AcousticEvent.ConvertScoreArray2Events(dctScores, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, * freqBinWidth, recognizerConfig.EventThreshold, * recognizerConfig.MinDuration, recognizerConfig.MaxDuration); * prunedEvents = new List<AcousticEvent>(); * foreach (var ae in events) * { * // add additional info * ae.SpeciesName = speciesName; * ae.SegmentStartOffset = segmentStartOffset; * ae.AnalysisIdealSegmentDuration = recordingDuration; * ae.Name = recognizerConfig.AbbreviatedSpeciesName; * prunedEvents.Add(ae); * } * var scoresPlot = new Plot(this.DisplayName, dctScores, recognizerConfig.EventThreshold); */ // do a recognizer test. if (MainEntry.InDEBUG) { //TestTools.RecognizerScoresTest(scores, new FileInfo(recording.FilePath)); //AcousticEvent.TestToCompareEvents(prunedEvents, new FileInfo(recording.FilePath)); } var scoresPlot = new Plot(this.DisplayName, croakScoreArray, recognizerConfig.EventThreshold); if (true) { // display a variety of debug score arrays // calculate amplitude at location double[] amplitudeArray = MatrixTools.SumRows(frogBand); DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scoresPlot, /*croakPlot2,*/ croakPlot1, amplPlot }; // NOTE: This DrawDebugImage() method can be over-written in this class. var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits); var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); debugImage.Save(debugPath); } return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = scoresPlot.AsList(), Events = prunedEvents, //Events = events }); }
/// <summary> /// THE KEY ANALYSIS METHOD /// </summary> /// <param name="recording"></param> /// <param name="sonoConfig"></param> /// <param name="lrConfig"></param> /// <param name="returnDebugImage"></param> /// <param name="segmentStartOffset"></param> /// <returns></returns> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LewinsRailConfig lrConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } int sr = recording.SampleRate; int upperBandMinHz = lrConfig.UpperBandMinHz; int upperBandMaxHz = lrConfig.UpperBandMaxHz; int lowerBandMinHz = lrConfig.LowerBandMinHz; int lowerBandMaxHz = lrConfig.LowerBandMaxHz; //double decibelThreshold = lrConfig.DecibelThreshold; //dB //int windowSize = lrConfig.WindowSize; double eventThreshold = lrConfig.EventThreshold; //in 0-1 double minDuration = lrConfig.MinDuration; // seconds double maxDuration = lrConfig.MaxDuration; // seconds double minPeriod = lrConfig.MinPeriod; // seconds double maxPeriod = lrConfig.MaxPeriod; // seconds //double freqBinWidth = sr / (double)windowSize; double freqBinWidth = sr / (double)sonoConfig.WindowSize; //i: MAKE SONOGRAM double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); int step = (int)Math.Round(framesPerSecond); //take one second steps int stepCount = rowCount / step; int sampleLength = 64; //64 frames = 3.7 seconds. Suitable for Lewins Rail. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 3; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if (period < minPeriod || period > maxPeriod) { continue; } // lay down score for sample length for (int j = 0; j < sampleLength; j++) { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //###################################################################### //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 5); var predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerBandMinHz, upperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray, segmentStartOffset); var hits = new double[rowCount, colCount]; //###################################################################### var scorePlot = new Plot("L.pect", intensity, lrConfig.IntensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(intensity, lrConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var intensityPlot = new Plot("Intensity", normalisedScores, normalisedThreshold); DataTools.Normalise(periodicity, 10, out normalisedScores, out normalisedThreshold); var periodicityPlot = new Plot("Periodicity", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, intensityPlot, periodicityPlot }; debugImage = DrawDebugImage(sonogram, predictedEvents, debugPlots, hits); } return(Tuple.Create(sonogram, hits, intensity, predictedEvents, debugImage)); } //Analysis()
public static AudioToSonogramResult GenerateSpectrogramImages(FileInfo sourceRecording, Dictionary <string, string> configDict, DirectoryInfo opDir) { string sourceName = configDict[ConfigKeys.Recording.Key_RecordingFileName]; sourceName = Path.GetFileNameWithoutExtension(sourceName); var result = new AudioToSonogramResult(); // init the image stack var list = new List <Image <Rgb24> >(); // 1) draw amplitude spectrogram AudioRecording recordingSegment = new AudioRecording(sourceRecording.FullName); var sonoConfig = new SonogramConfig(configDict) { NoiseReductionType = NoiseReductionType.None, }; // default values config // disable noise removal for first two spectrograms BaseSonogram sonogram = new AmplitudeSonogram(sonoConfig, recordingSegment.WavReader); // remove the DC bin sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.FrameCount - 1, sonogram.Configuration.FreqBinCount); //save spectrogram data at this point - prior to noise reduction double[,] spectrogramDataBeforeNoiseReduction = sonogram.Data; int lowPercentile = 20; double neighbourhoodSeconds = 0.25; int neighbourhoodFrames = (int)(sonogram.FramesPerSecond * neighbourhoodSeconds); double lcnContrastLevel = 0.25; //LoggedConsole.WriteLine("LCN: FramesPerSecond (Prior to LCN) = {0}", sonogram.FramesPerSecond); //LoggedConsole.WriteLine("LCN: Neighbourhood of {0} seconds = {1} frames", neighbourhoodSeconds, neighbourhoodFrames); sonogram.Data = NoiseRemoval_Briggs.NoiseReduction_ShortRecordings_SubtractAndLCN(sonogram.Data, lowPercentile, neighbourhoodFrames, lcnContrastLevel); // draw amplitude spectrogram unannotated FileInfo outputImage1 = new FileInfo(Path.Combine(opDir.FullName, sourceName + ".amplitd.png")); ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(sonogram.Data), outputImage1.FullName); // draw amplitude spectrogram annotated var image = sonogram.GetImageFullyAnnotated("AMPLITUDE SPECTROGRAM + Bin LCN (Local Contrast Normalisation)"); list.Add(image); //string path2 = @"C:\SensorNetworks\Output\Sonograms\dataInput2.png"; //Histogram.DrawDistributionsAndSaveImage(sonogram.Data, path2); // 2) A FALSE-COLOUR VERSION OF AMPLITUDE SPECTROGRAM double ridgeThreshold = 0.20; double[,] matrix = ImageTools.WienerFilter(sonogram.Data, 3); byte[,] hits = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold); hits = RidgeDetection.JoinDisconnectedRidgesInMatrix(hits, matrix, ridgeThreshold); image = SpectrogramTools.CreateFalseColourAmplitudeSpectrogram(spectrogramDataBeforeNoiseReduction, null, hits); image = sonogram.GetImageAnnotatedWithLinearHerzScale(image, "AMPLITUDE SPECTROGRAM + LCN + ridge detection"); list.Add(image); var envelopeImage = ImageTrack.DrawWaveEnvelopeTrack(recordingSegment, image.Width); list.Add(envelopeImage); // 3) now draw the standard decibel spectrogram sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader); // draw decibel spectrogram unannotated FileInfo outputImage2 = new FileInfo(Path.Combine(opDir.FullName, sourceName + ".deciBel.png")); ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(sonogram.Data), outputImage2.FullName); image = sonogram.GetImageFullyAnnotated("DECIBEL SPECTROGRAM"); list.Add(image); var segmentationImage = ImageTrack.DrawSegmentationTrack( sonogram, EndpointDetectionConfiguration.K1Threshold, EndpointDetectionConfiguration.K2Threshold, image.Width); list.Add(segmentationImage); // keep the sonogram data (NOT noise reduced) for later use double[,] dbSpectrogramData = (double[, ])sonogram.Data.Clone(); // 4) now draw the noise reduced decibel spectrogram sonoConfig.NoiseReductionType = NoiseReductionType.Standard; sonoConfig.NoiseReductionParameter = 3; //sonoConfig.NoiseReductionType = NoiseReductionType.SHORT_RECORDING; //sonoConfig.NoiseReductionParameter = 50; sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader); // draw decibel spectrogram unannotated FileInfo outputImage3 = new FileInfo(Path.Combine(opDir.FullName, sourceName + ".noNoise_dB.png")); ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(sonogram.Data), outputImage3.FullName); image = sonogram.GetImageFullyAnnotated("DECIBEL SPECTROGRAM + Lamel noise subtraction"); list.Add(image); // keep the sonogram data for later use double[,] nrSpectrogramData = sonogram.Data; // 5) A FALSE-COLOUR VERSION OF DECIBEL SPECTROGRAM ridgeThreshold = 2.5; matrix = ImageTools.WienerFilter(dbSpectrogramData, 3); hits = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold); image = SpectrogramTools.CreateFalseColourDecibelSpectrogram(dbSpectrogramData, nrSpectrogramData, hits); image = sonogram.GetImageAnnotatedWithLinearHerzScale(image, "DECIBEL SPECTROGRAM - Colour annotated"); list.Add(image); // 6) COMBINE THE SPECTROGRAM IMAGES Image compositeImage = ImageTools.CombineImagesVertically(list); FileInfo outputImage = new FileInfo(Path.Combine(opDir.FullName, sourceName + ".5spectro.png")); compositeImage.Save(outputImage.FullName); result.SpectrogramFile = outputImage; // 7) Generate the FREQUENCY x OSCILLATIONS Graphs and csv data //bool saveData = true; //bool saveImage = true; //double[] oscillationsSpectrum = Oscillations2014.GenerateOscillationDataAndImages(sourceRecording, configDict, saveData, saveImage); return(result); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; const int frameSize = 256; const double windowOverlap = 0.0; double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // duration of DCT in seconds //double dctDuration = (double)configuration[AnalysisKeys.DctDuration]; // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double decibelThreshold = configuration.GetDouble(AnalysisKeys.DecibelThreshold); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; var recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(maxHz / freqBinWidth) + 1; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double framesPerSecond = freqBinWidth; double minPeriod = 1 / (double)maxOscilFreq; double maxPeriod = 1 / (double)minOscilFreq; double dctDuration = 5 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // remove baseline from amplitude array var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7); // remove hi freq content from amplitude array var lowPassFilteredSignal = DataTools.filterMovingAverageOdd(amplitudeArray, 11); var dctScores = new double[highPassFilteredSignal.Length]; const int step = 2; for (int i = dctLength; i < highPassFilteredSignal.Length - dctLength; i += step) { if (highPassFilteredSignal[i] < decibelThreshold) { continue; } double[] subArray = DataTools.Subarray(highPassFilteredSignal, i, dctLength); // Look for oscillations in the highPassFilteredSignal Oscillations2014.GetOscillationUsingDct(subArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; if (!periodWithinBounds) { continue; } if (intensity < dctThreshold) { continue; } //lay down score for sample length for (int j = 0; j < dctLength; j++) { if (dctScores[i + j] < intensity && lowPassFilteredSignal[i + j] > decibelThreshold) { dctScores[i + j] = intensity; } } } //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var acousticEvents = AcousticEvent.ConvertScoreArray2Events( dctScores, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); // ###################################################################### acousticEvents.ForEach(ae => { ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = abbreviatedSpeciesName; }); var plot = new Plot(this.DisplayName, dctScores, eventThreshold); var plots = new List <Plot> { plot }; // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(highPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold); DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { ampltdPlot, lowPassPlot, demeanedPlot, plot }; Image debugImage = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, debugPlots, null); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } return(new RecognizerResults() { Sonogram = sonogram, Hits = null, Plots = plots, Events = acousticEvents, }); }
/// <summary> /// Calculates the mean intensity in a freq band defined by its min and max freq. /// THis method averages dB log values incorrectly but it is faster than doing many log conversions. /// This method is used to find acoustic events and is accurate enough for the purpose. /// </summary> public static (List <AcousticEvent>, double[]) GetWhistles( SpectrogramStandard sonogram, int minHz, int maxHz, int nyquist, double decibelThreshold, double minDuration, double maxDuration, TimeSpan segmentStartOffset) { var sonogramData = sonogram.Data; int frameCount = sonogramData.GetLength(0); int binCount = sonogramData.GetLength(1); double binWidth = nyquist / (double)binCount; int minBin = (int)Math.Round(minHz / binWidth); int maxBin = (int)Math.Round(maxHz / binWidth); //int binCountInBand = maxBin - minBin + 1; // buffer zone around whistle is four bins wide. int N = 4; // list of accumulated acoustic events var events = new List <AcousticEvent>(); var combinedIntensityArray = new double[frameCount]; // for all frequency bins except top and bottom for (int bin = minBin + 1; bin < maxBin; bin++) { // set up an intensity array for the frequency bin. double[] intensity = new double[frameCount]; if (minBin < N) { // for all time frames in this frequency bin for (int t = 0; t < frameCount; t++) { var bandIntensity = (sonogramData[t, bin - 1] + sonogramData[t, bin] + sonogramData[t, bin + 1]) / 3.0; var topSideBandIntensity = (sonogramData[t, bin + 3] + sonogramData[t, bin + 4] + sonogramData[t, bin + 5]) / 3.0; intensity[t] = bandIntensity - topSideBandIntensity; intensity[t] = Math.Max(0.0, intensity[t]); } } else { // for all time frames in this frequency bin for (int t = 0; t < frameCount; t++) { var bandIntensity = (sonogramData[t, bin - 1] + sonogramData[t, bin] + sonogramData[t, bin + 1]) / 3.0; var topSideBandIntensity = (sonogramData[t, bin + 3] + sonogramData[t, bin + 4] + sonogramData[t, bin + 5]) / 6.0; var bottomSideBandIntensity = (sonogramData[t, bin - 3] + sonogramData[t, bin - 4] + sonogramData[t, bin - 5]) / 6.0; intensity[t] = bandIntensity - topSideBandIntensity - bottomSideBandIntensity; intensity[t] = Math.Max(0.0, intensity[t]); } } // smooth the decibel array to allow for brief gaps. intensity = DataTools.filterMovingAverageOdd(intensity, 7); //calculate the Hertz bounds of the acoustic events for these freq bins int bottomHzBound = (int)Math.Floor(sonogram.FBinWidth * (bin - 1)); int topHzBound = (int)Math.Ceiling(sonogram.FBinWidth * (bin + 2)); //extract the events based on length and threshhold. // Note: This method does NOT do prior smoothing of the dB array. var acousticEvents = AcousticEvent.ConvertScoreArray2Events( intensity, bottomHzBound, topHzBound, sonogram.FramesPerSecond, sonogram.FBinWidth, decibelThreshold, minDuration, maxDuration, segmentStartOffset); // add to conbined intensity array for (int t = 0; t < frameCount; t++) { //combinedIntensityArray[t] += intensity[t]; combinedIntensityArray[t] = Math.Max(intensity[t], combinedIntensityArray[t]); } // combine events events.AddRange(acousticEvents); } //end for all freq bins // combine adjacent acoustic events //events = AcousticEvent.CombineOverlappingEvents(events); return(events, combinedIntensityArray); }
public static void Execute(Arguments arguments) { if (arguments == null) { arguments = Dev(); } LoggedConsole.WriteLine("DATE AND TIME:" + DateTime.Now); LoggedConsole.WriteLine("Syntactic Pattern Recognition\n"); //StringBuilder sb = new StringBuilder("DATE AND TIME:" + DateTime.Now + "\n"); //sb.Append("SCAN ALL RECORDINGS IN A DIRECTORY USING HTK-RECOGNISER\n"); Log.Verbosity = 1; FileInfo recordingPath = arguments.Source; FileInfo iniPath = arguments.Config; DirectoryInfo outputDir = arguments.Output; string opFName = "SPR-output.txt"; string opPath = outputDir + opFName; Log.WriteIfVerbose("# Output folder =" + outputDir); // A: READ PARAMETER VALUES FROM INI FILE var config = new ConfigDictionary(iniPath); Dictionary <string, string> dict = config.GetTable(); Dictionary <string, string> .KeyCollection keys = dict.Keys; string callName = dict[key_CALL_NAME]; double frameOverlap = Convert.ToDouble(dict[key_FRAME_OVERLAP]); //SPT PARAMETERS double intensityThreshold = Convert.ToDouble(dict[key_SPT_INTENSITY_THRESHOLD]); int smallLengthThreshold = Convert.ToInt32(dict[key_SPT_SMALL_LENGTH_THRESHOLD]); //WHIPBIRD PARAMETERS int whistle_MinHz = int.Parse(dict[key_WHISTLE_MIN_HZ]); int whistle_MaxHz = int.Parse(dict[key_WHISTLE_MAX_HZ]); double optimumWhistleDuration = double.Parse(dict[key_WHISTLE_DURATION]); //optimum duration of whistle in seconds int whip_MinHz = (dict.ContainsKey(key_WHIP_MIN_HZ)) ? int.Parse(dict[key_WHIP_MIN_HZ]) : 0; int whip_MaxHz = (dict.ContainsKey(key_WHIP_MAX_HZ)) ? int.Parse(dict[key_WHIP_MAX_HZ]) : 0; double whipDuration = (dict.ContainsKey(key_WHIP_DURATION)) ? double.Parse(dict[key_WHIP_DURATION]) : 0.0; //duration of whip in seconds //CURLEW PARAMETERS double minDuration = (dict.ContainsKey(key_MIN_DURATION)) ? double.Parse(dict[key_MIN_DURATION]) : 0.0; //min duration of call in seconds double maxDuration = (dict.ContainsKey(key_MAX_DURATION)) ? double.Parse(dict[key_MAX_DURATION]) : 0.0; //duration of call in seconds double eventThreshold = double.Parse(dict[key_EVENT_THRESHOLD]); //min score for an acceptable event int DRAW_SONOGRAMS = Convert.ToInt16(dict[key_DRAW_SONOGRAMS]); // B: CHECK to see if conversion from .MP3 to .WAV is necessary var destinationAudioFile = recordingPath; //LOAD RECORDING AND MAKE SONOGRAM BaseSonogram sonogram = null; using (var recording = new AudioRecording(destinationAudioFile.FullName)) { // if (recording.SampleRate != 22050) recording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE var sonoConfig = new SonogramConfig { NoiseReductionType = NoiseReductionType.None, //NoiseReductionType = NoiseReductionType.STANDARD, WindowOverlap = frameOverlap, }; sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); } List <AcousticEvent> predictedEvents = null; double[,] hits = null; double[] scores = null; var audioFileName = Path.GetFileNameWithoutExtension(destinationAudioFile.FullName); if (callName.Equals("WHIPBIRD")) { //SPT var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold); //SPR Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold); int slope = 0; //degrees of the circle. i.e. 90 = vertical line. double sensitivity = 0.7; //lower value = more sensitive var mHori = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity); slope = 87; //84 sensitivity = 0.8; //lower value = more sensitive var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 4, intensityThreshold + 1, sensitivity); Log.WriteLine("SPR finished"); Log.WriteLine("Extract Whipbird calls - start"); int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth); int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth); int whistleFrames = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); //86 = frames/sec. int minBound_Whip = (int)(whip_MinHz / sonogram.FBinWidth); int maxBound_Whip = (int)(whip_MaxHz / sonogram.FBinWidth); int whipFrames = (int)(sonogram.FramesPerSecond * whipDuration); //86 = frames/sec. var result3 = DetectWhipBird(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames, minBound_Whip, maxBound_Whip, whipFrames, smallLengthThreshold); scores = result3.Item1; hits = DataTools.AddMatrices(mHori, mVert); predictedEvents = AcousticEvent.ConvertScoreArray2Events( scores, whip_MinHz, whip_MaxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, eventThreshold, minDuration, maxDuration, TimeSpan.Zero); foreach (AcousticEvent ev in predictedEvents) { ev.FileName = audioFileName; ev.Name = callName; } sonogram.Data = result1.Item1; Log.WriteLine("Extract Whipbird calls - finished"); } else if (callName.Equals("CURLEW")) { //SPT double backgroundThreshold = 4.0; var result1 = SNR.NoiseReduce(sonogram.Data, NoiseReductionType.Standard, backgroundThreshold); //var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold); //var result1 = doNoiseRemoval(sonogram, intensityThreshold, smallLengthThreshold); //SPR Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold); int slope = 20; //degrees of the circle. i.e. 90 = vertical line. double sensitivity = 0.8; //lower value = more sensitive var mHori = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity); slope = 160; sensitivity = 0.8; //lower value = more sensitive var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 3, intensityThreshold + 1, sensitivity); Log.WriteLine("SPR finished"); //detect curlew calls int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth); int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth); int whistleFrames = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); var result3 = DetectCurlew(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames, smallLengthThreshold); //process curlew scores - look for curlew characteristic periodicity double minPeriod = 1.2; double maxPeriod = 1.8; int minPeriod_frames = (int)Math.Round(sonogram.FramesPerSecond * minPeriod); int maxPeriod_frames = (int)Math.Round(sonogram.FramesPerSecond * maxPeriod); scores = DataTools.filterMovingAverage(result3.Item1, 21); scores = DataTools.PeriodicityDetection(scores, minPeriod_frames, maxPeriod_frames); //extract events predictedEvents = AcousticEvent.ConvertScoreArray2Events( scores, whistle_MinHz, whistle_MaxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, eventThreshold, minDuration, maxDuration, TimeSpan.Zero); foreach (AcousticEvent ev in predictedEvents) { ev.FileName = audioFileName; ev.Name = callName; } hits = DataTools.AddMatrices(mHori, mVert); sonogram.Data = result1.Item1; Log.WriteLine("Extract Curlew calls - finished"); } else if (callName.Equals("CURRAWONG")) { //SPT var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold); //SPR Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold); int slope = 70; //degrees of the circle. i.e. 90 = vertical line. //slope = 210; double sensitivity = 0.7; //lower value = more sensitive var mHori = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity); slope = 110; //slope = 340; sensitivity = 0.7; //lower value = more sensitive var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 3, intensityThreshold + 1, sensitivity); Log.WriteLine("SPR finished"); int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth); int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth); int whistleFrames = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); //86 = frames/sec. var result3 = DetectCurlew(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames + 10, smallLengthThreshold); scores = result3.Item1; hits = DataTools.AddMatrices(mHori, mVert); predictedEvents = AcousticEvent.ConvertIntensityArray2Events( scores, TimeSpan.Zero, whistle_MinHz, whistle_MaxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, eventThreshold, 0.5, maxDuration); foreach (AcousticEvent ev in predictedEvents) { ev.FileName = audioFileName; //ev.Name = callName; } } //write event count to results file. double sigDuration = sonogram.Duration.TotalSeconds; //string fname = Path.GetFileName(recordingPath); int count = predictedEvents.Count; Log.WriteIfVerbose("Number of Events: " + count); string str = string.Format("{0}\t{1}\t{2}", callName, sigDuration, count); FileTools.WriteTextFile(opPath, AcousticEvent.WriteEvents(predictedEvents, str).ToString()); // SAVE IMAGE string imageName = outputDir + audioFileName; string imagePath = imageName + ".png"; if (File.Exists(imagePath)) { int suffix = 1; while (File.Exists(imageName + "." + suffix.ToString() + ".png")) { suffix++; } //{ // suffix = (suffix == string.Empty) ? "1" : (int.Parse(suffix) + 1).ToString(); //} //File.Delete(outputDir + audioFileName + "." + suffix.ToString() + ".png"); File.Move(imagePath, imageName + "." + suffix.ToString() + ".png"); } //string newPath = imagePath + suffix + ".png"; if (DRAW_SONOGRAMS == 2) { DrawSonogram(sonogram, imagePath, hits, scores, predictedEvents, eventThreshold); } else if ((DRAW_SONOGRAMS == 1) && (predictedEvents.Count > 0)) { DrawSonogram(sonogram, imagePath, hits, scores, predictedEvents, eventThreshold); } Log.WriteIfVerbose("Image saved to: " + imagePath); //string savePath = outputDir + Path.GetFileNameWithoutExtension(recordingPath); //string suffix = string.Empty; //Image im = sonogram.GetImage(false, false); //string newPath = savePath + suffix + ".jpg"; //im.Save(newPath); LoggedConsole.WriteLine("\nFINISHED RECORDING!"); Console.ReadLine(); }
public static (List <AcousticEvent>, double[], double[]) GetComponentsWithHarmonics( SpectrogramStandard spectrogram, int minHz, int maxHz, int nyquist, double decibelThreshold, double dctThreshold, double minDuration, double maxDuration, int minFormantGap, int maxFormantGap, TimeSpan segmentStartOffset) { // Event threshold - Determines FP / FN trade-off for events. //double eventThreshold = 0.2; var sonogramData = spectrogram.Data; int frameCount = sonogramData.GetLength(0); int binCount = sonogramData.GetLength(1); double freqBinWidth = nyquist / (double)binCount; int minBin = (int)Math.Round(minHz / freqBinWidth); int maxBin = (int)Math.Round(maxHz / freqBinWidth); // extract the sub-band double[,] subMatrix = MatrixTools.Submatrix(spectrogram.Data, 0, minBin, frameCount - 1, maxBin); //ii: DETECT HARMONICS // now look for harmonics in search band using the Xcorrelation-DCT method. var results = CrossCorrelation.DetectHarmonicsInSpectrogramData(subMatrix, decibelThreshold); // set up score arrays double[] dBArray = results.Item1; double[] harmonicIntensityScores = results.Item2; //an array of formant intesnity int[] maxIndexArray = results.Item3; for (int r = 0; r < frameCount; r++) { if (harmonicIntensityScores[r] < dctThreshold) { continue; } //ignore locations with incorrect formant gap int maxId = maxIndexArray[r]; int bandBinCount = maxBin - minBin + 1; double freqBinGap = 2 * bandBinCount / (double)maxId; double formantGap = freqBinGap * freqBinWidth; if (formantGap < minFormantGap || formantGap > maxFormantGap) { harmonicIntensityScores[r] = 0.0; } } // smooth the harmonicIntensityScores array to allow for brief gaps. harmonicIntensityScores = DataTools.filterMovingAverageOdd(harmonicIntensityScores, 3); //extract the events based on length and threshhold. // Note: This method does NOT do prior smoothing of the score array. var acousticEvents = AcousticEvent.ConvertScoreArray2Events( harmonicIntensityScores, minHz, maxHz, spectrogram.FramesPerSecond, spectrogram.FBinWidth, dctThreshold, minDuration, maxDuration, segmentStartOffset); // add in temporary names to the events // These can be altered later. foreach (var ev in acousticEvents) { ev.SpeciesName = "NoName"; ev.Name = "Harmonics"; } return(acousticEvents, dBArray, harmonicIntensityScores); }
/// <summary> /// ################ THE KEY ANALYSIS METHOD /// </summary> /// <param name="recording"></param> /// <param name="sonoConfig"></param> /// <param name="lbConfig"></param> /// <param name="drawDebugImage"></param> /// <param name="segmentStartOffset"></param> /// <returns></returns> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaBicolorConfig lbConfig, bool drawDebugImage, TimeSpan segmentStartOffset) { double decibelThreshold = lbConfig.DecibelThreshold; //dB double intensityThreshold = lbConfig.IntensityThreshold; //double eventThreshold = lbConfig.EventThreshold; //in 0-1 if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 3 * lbConfig.MaxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0); // Could smooth here rather than above. Above seemed slightly better? amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var predictedEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lbConfig.LowerBandMinHz, lbConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, decibelThreshold, lbConfig.MinDuration, lbConfig.MaxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // init the score array double[] scores = new double[rowCount]; //iii: CONVERT SCORES TO ACOUSTIC EVENTS // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedEvents) { //rowtop, rowWidth int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); double oscilFreq; double period; double intensity; Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity); bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } for (int j = 0; j < dctLength; j++) //lay down score for sample length { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = "L.b"; ae.Score_MaxInEvent = maximumIntensity; confirmedEvents.Add(ae); } } //###################################################################### // calculate the cosine similarity scores var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold); //DEBUG IMAGE this recognizer only. MUST set false for deployment. Image debugImage = null; if (drawDebugImage) { // display a variety of debug score arrays double[] normalisedScores; double normalisedThreshold; //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold); //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold); //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold); DataTools.Normalise(amplitudeScores, decibelThreshold, out normalisedScores, out normalisedThreshold); var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; // other debug plots //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot }; debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
/// <summary> /// ################ THE KEY ANALYSIS METHOD for TRILLS /// /// See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles. /// </summary> /// <param name="recording"></param> /// <param name="sonoConfig"></param> /// <param name="lwConfig"></param> /// <param name="returnDebugImage"></param> /// <param name="segmentStartOffset"></param> /// <returns></returns> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaWatjulumConfig lwConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { double intensityThreshold = lwConfig.IntensityThreshold; double minDuration = lwConfig.MinDurationOfTrill; // seconds double maxDuration = lwConfig.MaxDurationOfTrill; // seconds double minPeriod = lwConfig.MinPeriod; // seconds double maxPeriod = lwConfig.MaxPeriod; // seconds if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 4 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); //int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7); // Could smooth here rather than above. Above seemed slightly better? //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); //differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, lwConfig.DecibelThreshold, minDuration, maxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // LOOK FOR TRILL EVENTS // init the score array double[] scores = new double[rowCount]; // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedTrillEvents) { int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } for (int j = 0; j < dctLength; j++) //lay down score for sample length { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}"; ae.Score_MaxInEvent = maximumIntensity; ae.Profile = lwConfig.ProfileNames[0]; confirmedEvents.Add(ae); } } //###################################################################### // LOOK FOR TINK EVENTS // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS double minDurationOfTink = lwConfig.MinDurationOfTink; // seconds double maxDurationOfTink = lwConfig.MaxDurationOfTink; // seconds // want stronger threshold for tink because brief. double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0; var predictedTinkEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, tinkDecibelThreshold, minDurationOfTink, maxDurationOfTink, segmentStartOffset); foreach (var ae2 in predictedTinkEvents) { // Prune the list of potential acoustic events, for example using Cosine Similarity. //rowtop, rowWidth //int eventStart = ae2.Oblong.RowTop; //int eventWidth = ae2.Oblong.RowWidth; //int step = 2; //double maximumIntensity = 0.0; // add abbreviatedSpeciesName into event //if (maximumIntensity >= intensityThreshold) //{ ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}"; //ae2.Score_MaxInEvent = maximumIntensity; ae2.Profile = lwConfig.ProfileNames[1]; confirmedEvents.Add(ae2); //} } //###################################################################### var scorePlot = new Plot(lwConfig.SpeciesName, scores, intensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out var normalisedScores, out var normalisedThreshold); var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // BETTER TO CALCULATE THIS. IGNORE USER! // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]); // duration of DCT in seconds double dctDuration = configuration.GetDouble(AnalysisKeys.DctDuration); // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } // The default was 512 for Canetoad. // Framesize = 128 seems to work for Littoria fallax. const int FrameSize = 128; double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap( recording.SampleRate, FrameSize, maxOscilFreq); //windowOverlap = 0.75; // previous default // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = FrameSize, WindowOverlap = windowOverlap, //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.1, }; // sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("STANDARD"); TimeSpan recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); // double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, (rowCount - 1), maxbin); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER // This window is used to smooth the score array before extracting events. // A short window (e.g. 3) preserves sharper score edges to define events but also keeps noise. int scoreSmoothingWindow = 13; double[] scores; // predefinition of score array List <AcousticEvent> acousticEvents; double[,] hits; Oscillations2012.Execute( (SpectrogramStandard)sonogram, minHz, maxHz, dctDuration, minOscilFreq, maxOscilFreq, dctThreshold, eventThreshold, minDuration, maxDuration, scoreSmoothingWindow, out scores, out acousticEvents, out hits, segmentStartOffset); acousticEvents.ForEach(ae => { ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = abbreviatedSpeciesName; }); var plot = new Plot(this.DisplayName, scores, eventThreshold); var plots = new List <Plot> { plot }; this.WriteDebugImage(recording, outputDirectory, sonogram, acousticEvents, plots, hits); return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plots, Events = acousticEvents, }); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { // common properties var speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; var abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // BETTER TO CALCULATE THIS. IGNORE USER! // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]); // duration of DCT in seconds double dctDuration = configuration.GetDouble(AnalysisKeys.DctDuration); // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); // this default framesize seems to work for Canetoad const int frameSize = 512; double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap( recording.SampleRate, frameSize, maxOscilFreq); //windowOverlap = 0.75; // previous default // DEBUG: Following line used to search for where indeterminism creeps into the spectrogram values which vary from run to run. //FileTools.AddArrayAdjacentToExistingArrays(Path.Combine(outputDirectory.FullName, recording.BaseName+"_RecordingSamples.csv"), recording.WavReader.GetChannel(0)); // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. NoiseReductionType = NoiseReductionType.None, }; // sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("STANDARD"); TimeSpan recordingDuration = recording.Duration; //int sr = recording.SampleRate; //double freqBinWidth = sr / (double)sonoConfig.WindowSize; /* ############################################################################################################################################# * window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins * 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz * 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz * 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz */ // int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; // int maxbin = minBin + numberOfBins - 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); //int rowCount = sonogram.Data.GetLength(0); //int colCount = sonogram.Data.GetLength(1); // DEBUG: Following lines used to search for where indeterminism creeps into the spectrogram values which vary from run to run. //double[] array = DataTools.Matrix2Array(sonogram.Data); //FileTools.AddArrayAdjacentToExistingArrays(Path.Combine(outputDirectory.FullName, recording.BaseName+".csv"), array); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER double minDurationOfAdvertCall = minDuration; // this boundary duration should = 5.0 seconds as of 4 June 2015. //double minDurationOfReleaseCall = 1.0; double[] scores; // predefinition of score array List <AcousticEvent> events; double[,] hits; Oscillations2012.Execute( (SpectrogramStandard)sonogram, minHz, maxHz, dctDuration, minOscilFreq, maxOscilFreq, dctThreshold, eventThreshold, minDurationOfAdvertCall, maxDuration, out scores, out events, out hits, segmentStartOffset); // DEBUG: Following line used to search for where indeterminism creeps into the event detection //FileTools.AddArrayAdjacentToExistingArrays(Path.Combine(outputDirectory.FullName, recording.BaseName+"_ScoreArray.csv"), scores); var prunedEvents = new List <AcousticEvent>(); foreach (AcousticEvent ae in events) { //if (ae.Duration < minDurationOfReleaseCall) { continue; } if (ae.EventDurationSeconds < minDurationOfAdvertCall) { continue; } if (ae.EventDurationSeconds > maxDuration) { continue; } // add additional info ae.SpeciesName = speciesName; ae.Name = abbreviatedSpeciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; prunedEvents.Add(ae); //if (ae.Duration >= minDurationOfAdvertCall) //{ // ae.Name = abbreviatedSpeciesName; // + ".AdvertCall"; // prunedEvents.Add(ae); // continue; //} } // do a recognizer test. if (false) { if (MainEntry.InDEBUG) { RecognizerTest(scores, new FileInfo(recording.FilePath)); RecognizerTest(prunedEvents, new FileInfo(recording.FilePath)); } } var plot = new Plot(this.DisplayName, scores, eventThreshold); return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = prunedEvents, //Events = events }); }