/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new LitoriaCaeruleaConfig(); recognizerConfig.ReadConfigFile(configuration); // common properties string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no name>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // BETTER TO SET THESE. IGNORE USER! // This framesize is large because the oscillation we wish to detect is due to repeated croaks // having an interval of about 0.6 seconds. The overlap is also required to give smooth oscillation. const int frameSize = 2048; const double windowOverlap = 0.5; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // use the default HAMMING window //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.None NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.0, }; TimeSpan recordingDuration = recording.WavReader.Time; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = sr / (sonoConfig.WindowSize * (1 - windowOverlap)); //int dominantFreqBin = (int)Math.Round(recognizerConfig.DominantFreq / freqBinWidth) + 1; int minBin = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1; var decibelThreshold = 9.0; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER int rowCount = sonogram.Data.GetLength(0); // get the freq band as set by min and max Herz var frogBand = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // Now look for spectral maxima. For L.caerulea, the max should lie around 1100Hz +/-150 Hz. // Skip over spectra where maximum is not in correct location. int buffer = 150; var croakScoreArray = new double[rowCount]; var hzAtTopOfTopBand = recognizerConfig.DominantFreq + buffer; var hzAtBotOfTopBand = recognizerConfig.DominantFreq - buffer; var binAtTopOfTopBand = (int)Math.Round((hzAtTopOfTopBand - recognizerConfig.MinHz) / freqBinWidth); var binAtBotOfTopBand = (int)Math.Round((hzAtBotOfTopBand - recognizerConfig.MinHz) / freqBinWidth); // scan the frog band and get the decibel value of those spectra which have their maximum within the correct subband. for (int x = 0; x < rowCount; x++) { //extract spectrum var spectrum = MatrixTools.GetRow(frogBand, x); int maxIndex = DataTools.GetMaxIndex(spectrum); if (spectrum[maxIndex] < decibelThreshold) { continue; } if (maxIndex < binAtTopOfTopBand && maxIndex > binAtBotOfTopBand) { croakScoreArray[x] = spectrum[maxIndex]; } } // Perpare a normalised plot for later display with spectrogram double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var text1 = string.Format($"Croak scores (threshold={decibelThreshold})"); var croakPlot1 = new Plot(text1, normalisedScores, normalisedThreshold); // extract potential croak events from the array of croak candidate var croakEvents = AcousticEvent.ConvertScoreArray2Events( croakScoreArray, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, recognizerConfig.EventThreshold, recognizerConfig.MinCroakDuration, recognizerConfig.MaxCroakDuration, segmentStartOffset); // add necesary info into the candidate events var prunedEvents = new List <AcousticEvent>(); foreach (var ae in croakEvents) { // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; prunedEvents.Add(ae); } // With those events that survive the above Array2Events process, we now extract a new array croak scores croakScoreArray = AcousticEvent.ExtractScoreArrayFromEvents(prunedEvents, rowCount, recognizerConfig.AbbreviatedSpeciesName); DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var text2 = string.Format($"Croak events (threshold={decibelThreshold})"); var croakPlot2 = new Plot(text2, normalisedScores, normalisedThreshold); // Look for oscillations in the difference array // duration of DCT in seconds //croakScoreArray = DataTools.filterMovingAverageOdd(croakScoreArray, 5); double dctDuration = recognizerConfig.DctDuration; // minimum acceptable value of a DCT coefficient double dctThreshold = recognizerConfig.DctThreshold; double minOscRate = 1 / recognizerConfig.MaxPeriod; double maxOscRate = 1 / recognizerConfig.MinPeriod; var dctScores = Oscillations2012.DetectOscillations(croakScoreArray, framesPerSecond, dctDuration, minOscRate, maxOscRate, dctThreshold); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER var events = AcousticEvent.ConvertScoreArray2Events( dctScores, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, recognizerConfig.EventThreshold, recognizerConfig.MinDuration, recognizerConfig.MaxDuration, segmentStartOffset); double[,] hits = null; prunedEvents = new List <AcousticEvent>(); foreach (var ae in events) { // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; prunedEvents.Add(ae); } // do a recognizer test. if (MainEntry.InDEBUG) { //TestTools.RecognizerScoresTest(scores, new FileInfo(recording.FilePath)); //AcousticEvent.TestToCompareEvents(prunedEvents, new FileInfo(recording.FilePath)); } var scoresPlot = new Plot(this.DisplayName, dctScores, recognizerConfig.EventThreshold); if (true) { // display a variety of debug score arrays // calculate amplitude at location double[] amplitudeArray = MatrixTools.SumRows(frogBand); DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scoresPlot, croakPlot2, croakPlot1, amplPlot }; // NOTE: This DrawDebugImage() method can be over-written in this class. var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits); var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); debugImage.Save(debugPath); } return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = scoresPlot.AsList(), Events = prunedEvents, //Events = events }); }
/// <summary> /// ################ THE KEY ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - ignore those set by user int frameSize = 128; double windowOverlap = 0.5; double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double minDuration = double.Parse(configDict["MIN_DURATION"]); // seconds double maxDuration = double.Parse(configDict["MAX_DURATION"]); // seconds double minPeriod = double.Parse(configDict["MIN_PERIOD"]); // seconds double maxPeriod = double.Parse(configDict["MAX_PERIOD"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double frameOffset = sonoConfig.GetFrameOffset(sr); double framesPerSecond = 1 / frameOffset; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 256 17640 14.5ms 68.9 68.9 ms hz hz // 512 17640 29.0ms 34.4 34.4 ms hz hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //The Xcorrelation-FFT technique requires number of bins to scan to be power of 2. // Assuming sr=17640 and window=256, then binWidth = 68.9Hz and 1500Hz = bin 21.7.. // Therefore do a Xcorrelation between bins 21 and 22. // Number of frames to span must power of 2. Try 16 frames which covers 232ms - almost 1/4 second. int midHz = 1500; int lowerBin = (int)(midHz / freqBinWidth) + 1; //because bin[0] = DC int upperBin = lowerBin + 4; int lowerHz = (int)Math.Floor((lowerBin - 1) * freqBinWidth); int upperHz = (int)Math.Ceiling((upperBin - 1) * freqBinWidth); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetColumn(sonogram.Data, lowerBin); double[] upperArray = MatrixTools.GetColumn(sonogram.Data, upperBin); lowerArray = DataTools.NormaliseInZeroOne(lowerArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB ####################################################################### upperArray = DataTools.NormaliseInZeroOne(upperArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB ####################################################################### int step = (int)(framesPerSecond / 40); //take one/tenth second steps int stepCount = rowCount / step; int sampleLength = 32; //16 frames = 232ms - almost 1/4 second. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if (lowerSubarray == null || upperSubarray == null) { break; } if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 2; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if (period < minPeriod || period > maxPeriod) { continue; } // lay down score for sample length for (int j = 0; j < sampleLength; j++) { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 3); intensity = DataTools.NormaliseInZeroOne(intensity, 0, 0.5); //## ABSOLUTE NORMALISATION 0-0.5 ####################################################################### List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerHz, upperHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray, segmentStartOffset); var hits = new double[rowCount, colCount]; var plots = new List <Plot>(); //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0)); //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0)); //plots.Add(new Plot("lowerArray", DataTools.NormaliseMatrixValues(lowerArray), 0.25)); //plots.Add(new Plot("upperArray", DataTools.NormaliseMatrixValues(upperArray), 0.25)); //plots.Add(new Plot("intensity", DataTools.NormaliseMatrixValues(intensity), intensityThreshold)); plots.Add(new Plot("intensity", intensity, intensityThreshold)); return(Tuple.Create(sonogram, hits, plots, predictedEvents, tsRecordingtDuration)); } //Analysis()
} //Analysis() public static System.Tuple <List <double[]>, double[, ], List <AcousticEvent> > DetectKiwi(BaseSonogram sonogram, int minHz, int maxHz, /* double dctDuration, double dctThreshold, */ double minPeriod, double maxPeriod, double eventThreshold, double minDuration, double maxDuration) { int step = (int)Math.Round(sonogram.FramesPerSecond); //take one second steps int sampleLength = 32; //32 frames = 1.85 seconds. 64 frames (i.e. 3.7 seconds) is to long a sample - require stationarity. int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double minFramePeriod = minPeriod * sonogram.FramesPerSecond; double maxFramePeriod = maxPeriod * sonogram.FramesPerSecond; int minBin = (int)(minHz / sonogram.FBinWidth); int maxBin = (int)(maxHz / sonogram.FBinWidth); //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz double[] fullArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, (rowCount - 1), minBin + 130); var result1 = CrossCorrelation.DetectXcorrelationInTwoArrays(fullArray, fullArray, step, sampleLength, minFramePeriod, maxFramePeriod); double[] intensity1 = result1.Item1; double[] periodicity1 = result1.Item2; intensity1 = DataTools.filterMovingAverage(intensity1, 11); //############################################################################################################################################# //double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, (rowCount - 1), minBin + 65); //double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin+66, (rowCount - 1), minBin+130); //int actualMaxHz = (int)Math.Round((minBin+130) * sonogram.FBinWidth); //var result2 = CrossCorrelation.DetectXcorrelationInTwoArrays(lowerArray, upperArray, step, sampleLength, minFramePeriod, maxFramePeriod); //double[] intensity2 = result2.Item1; //double[] periodicity2 = result2.Item2; //intensity2 = DataTools.filterMovingAverage(intensity2, 5); //############################################################################################################################################# //minFramePeriod = 4; //maxFramePeriod = 14; //var return3 = Gratings.ScanArrayForGratingPattern(fullArray, (int)minFramePeriod, (int)maxFramePeriod, 4, step); //var return3 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 4); //var return4 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 5); //var return5 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 8); //var return6 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 10); //var return7 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 12); //############################################################################################################################################# //bool normaliseDCT = true; //Double[,] maleHits; //predefinition of hits matrix - to superimpose on sonogram image //double[] maleScores; //predefinition of score array //double[] maleOscRate; //List<AcousticEvent> predictedMaleEvents; //double minOscilFreq = 1 / maxPeriod; //convert max period (seconds) to oscilation rate (Herz). //double maxOscilFreq = 1 / minPeriod; //convert min period (seconds) to oscilation rate (Herz). //OscillationAnalysis.Execute((SpectralSonogram)sonogram, minHz, maxHz, dctDuration, dctThreshold, normaliseDCT, // minOscilFreq, maxOscilFreq, eventThreshold, minDuration, maxDuration, // out maleScores, out predictedMaleEvents, out maleHits, out maleOscRate); //iii: CONVERT SCORES TO ACOUSTIC EVENTS List <AcousticEvent> events = AcousticEvent.ConvertScoreArray2Events(intensity1, minHz, maxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, eventThreshold, minDuration, maxDuration); CropEvents(events, fullArray, minDuration); CalculateAvIntensityScore(events, intensity1); CalculateDeltaPeriodScore(events, periodicity1, minFramePeriod, maxFramePeriod); CalculateBandWidthScore(events, sonogram.Data); CalculatePeaksScore(events, fullArray); //FilterEvents(events); CalculateWeightedEventScore(events); // PREPARE HITS MATRIX var hits = new double[rowCount, colCount]; double range = maxFramePeriod - minFramePeriod; for (int r = 0; r < rowCount; r++) { if (intensity1[r] > eventThreshold) { for (int c = minBin; c < maxBin; c++) { hits[r, c] = (periodicity1[r] - minFramePeriod) / range; //normalisation } } } periodicity1 = CropArrayToEvents(events, periodicity1); //for display only var scores = new List <double[]>(); scores.Add(DataTools.normalise(fullArray)); //scores.Add(DataTools.normalise(upperArray)); //scores.Add(DataTools.normalise(lowerArray)); scores.Add(DataTools.normalise(intensity1)); scores.Add(DataTools.normalise(periodicity1)); //scores.Add(DataTools.normalise(intensity2)); //scores.Add(DataTools.normalise(return3)); //scores.Add(DataTools.normalise(return4)); //scores.Add(DataTools.normalise(return5)); //scores.Add(DataTools.normalise(return6)); //scores.Add(DataTools.normalise(return7)); //scores.Add(DataTools.normalise(maleScores)); //scores.Add(DataTools.normalise(maleOscRate)); return(System.Tuple.Create(scores, hits, events)); }
} //Analysis() public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent> > DetectHarmonics( AudioRecording recording, double intensityThreshold, int minHz, int minFormantgap, int maxFormantgap, double minDuration, int windowSize, double windowOverlap, TimeSpan segmentStartOffset) { //i: MAKE SONOGRAM int numberOfBins = 32; double binWidth = recording.SampleRate / (double)windowSize; int sr = recording.SampleRate; double frameDuration = windowSize / (double)sr; // Duration of full frame or window in seconds double frameOffset = frameDuration * (1 - windowOverlap); //seconds between starts of consecutive frames double framesPerSecond = 1 / frameOffset; //double framesPerSecond = sr / (double)windowSize; //int frameOffset = (int)(windowSize * (1 - overlap)); //int frameCount = (length - windowSize + frameOffset) / frameOffset; double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); var results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram( recording.WavReader.Samples, sr, epsilon, windowSize, windowOverlap); double[] avAbsolute = results2.Average; //average absolute value over the minute recording //double[] envelope = results2.Item2; double[,] matrix = results2 .AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. double windowPower = results2.WindowPower; //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int minBin = (int)Math.Round(minHz / binWidth); int maxHz = (int)Math.Round(minHz + (numberOfBins * binWidth)); int rowCount = matrix.GetLength(0); int colCount = matrix.GetLength(1); int maxbin = minBin + numberOfBins; double[,] subMatrix = MatrixTools.Submatrix(matrix, 0, minBin + 1, rowCount - 1, maxbin); //ii: DETECT HARMONICS int zeroBinCount = 5; //to remove low freq content which dominates the spectrum var results = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount); double[] intensity = results.Item1; //an array of periodicity scores double[] periodicity = results.Item2; //transfer periodicity info to a hits matrix. //intensity = DataTools.filterMovingAverage(intensity, 3); double[] scoreArray = new double[intensity.Length]; var hits = new double[rowCount, colCount]; for (int r = 0; r < rowCount; r++) { double relativePeriod = periodicity[r] / numberOfBins / 2; if (intensity[r] > intensityThreshold) { for (int c = minBin; c < maxbin; c++) { hits[r, c] = relativePeriod; } } double herzPeriod = periodicity[r] * binWidth; if (herzPeriod > minFormantgap && herzPeriod < maxFormantgap) { scoreArray[r] = 2 * intensity[r] * intensity[r]; //enhance high score wrt low score. } } scoreArray = DataTools.filterMovingAverage(scoreArray, 11); //iii: CONVERT TO ACOUSTIC EVENTS double maxDuration = 100000.0; //abitrary long number - do not want to restrict duration of machine noise List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( scoreArray, minHz, maxHz, framesPerSecond, binWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); hits = null; //set up the songogram to return. Use the existing amplitude sonogram int bitsPerSample = recording.WavReader.BitsPerSample; TimeSpan duration = recording.Duration; NoiseReductionType nrt = SNR.KeyToNoiseReductionType("STANDARD"); var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram( recording.BaseName, windowSize, windowOverlap, bitsPerSample, windowPower, sr, duration, nrt, matrix); sonogram.DecibelsNormalised = new double[rowCount]; //foreach frame or time step for (int i = 0; i < rowCount; i++) { sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]); } sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised); return(Tuple.Create(sonogram, hits, scoreArray, predictedEvents)); } //end Execute_HDDetect
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new CriniaRemotaConfig(); recognizerConfig.ReadConfigFile(configuration); // BETTER TO SET THESE. IGNORE USER! // this default framesize seems to work const int frameSize = 256; const double windowOverlap = 0.25; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // use the default HAMMING window //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.None NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.0, }; TimeSpan recordingDuration = recording.WavReader.Time; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1; var decibelThreshold = 6.0; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER int rowCount = sonogram.Data.GetLength(0); double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); double[] topBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, maxBin + 3, rowCount - 1, maxBin + 9); double[] botBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin - 3, rowCount - 1, minBin - 9); double[] diffArray = new double[amplitudeArray.Length]; for (int i = 0; i < amplitudeArray.Length; i++) { diffArray[i] = amplitudeArray[i] - topBand[i] - botBand[i]; if (diffArray[i] < 1.0) { diffArray[i] = 0.0; } } bool[] peakArray = new bool[amplitudeArray.Length]; for (int i = 1; i < diffArray.Length - 1; i++) { if (diffArray[i] < decibelThreshold) { continue; } if (diffArray[i] > diffArray[i - 1] && diffArray[i] > diffArray[i + 1]) { peakArray[i] = true; } } // calculate score array based on density of peaks double frameDuration = (double)frameSize / sr; // use a stimulus-decay function double durationOfDecayTail = 0.35; // seconds int lengthOfDecayTail = (int)Math.Round(durationOfDecayTail / frameDuration); double decayrate = 0.95; //double decay = -0.05; //double fractionalDecay = Math.Exp(decay * lengthOfDecayTail); // the above setting gives decay of 0.22 over 0.35 seconds or 30 frames. double score = 0.0; int locationOfLastPeak = 0; double[] peakScores = new double[amplitudeArray.Length]; for (int p = 0; p < peakScores.Length - 1; p++) { if (!peakArray[p]) { int distanceFromLastpeak = p - locationOfLastPeak; // score decay score *= decayrate; // remove the decay tail if (score < 0.5 && distanceFromLastpeak > lengthOfDecayTail && p >= lengthOfDecayTail) { score = 0.0; for (int j = 0; j < lengthOfDecayTail; j++) { peakScores[p - j] = score; } } } else { locationOfLastPeak = p; score += 0.8; } peakScores[p] = score; } var events = AcousticEvent.ConvertScoreArray2Events( peakScores, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, recognizerConfig.EventThreshold, recognizerConfig.MinDuration, recognizerConfig.MaxDuration, segmentStartOffset); double[,] hits = null; var prunedEvents = new List <AcousticEvent>(); foreach (var ae in events) { if (ae.EventDurationSeconds < recognizerConfig.MinDuration || ae.EventDurationSeconds > recognizerConfig.MaxDuration) { continue; } // add additional info ae.SpeciesName = recognizerConfig.SpeciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; prunedEvents.Add(ae); } // do a recognizer test. if (MainEntry.InDEBUG) { // var testDir = new DirectoryInfo(outputDirectory.Parent.Parent.FullName); // TestTools.RecognizerScoresTest(recording.BaseName, testDir, recognizerConfig.AnalysisName, peakScores); // AcousticEvent.TestToCompareEvents(recording.BaseName, testDir, recognizerConfig.AnalysisName, prunedEvents); } var plot = new Plot(this.DisplayName, peakScores, recognizerConfig.EventThreshold); if (false) { // display a variety of debug score arrays double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(diffArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var diffPlot = new Plot("Diff plot", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { plot, amplPlot, diffPlot }; // NOTE: This DrawDebugImage() method can be over-written in this class. var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits); var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); debugImage.Save(debugPath); } return(new RecognizerResults { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = prunedEvents, //Events = events }); } // Recognize()
/// <summary> /// THE KEY ANALYSIS METHOD. /// </summary> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LewinsRailConfig lrConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } int sr = recording.SampleRate; int upperBandMinHz = lrConfig.UpperBandMinHz; int upperBandMaxHz = lrConfig.UpperBandMaxHz; int lowerBandMinHz = lrConfig.LowerBandMinHz; int lowerBandMaxHz = lrConfig.LowerBandMaxHz; //double decibelThreshold = lrConfig.DecibelThreshold; //dB //int windowSize = lrConfig.WindowSize; double eventThreshold = lrConfig.EventThreshold; //in 0-1 double minDuration = lrConfig.MinDuration; // seconds double maxDuration = lrConfig.MaxDuration; // seconds double minPeriod = lrConfig.MinPeriod; // seconds double maxPeriod = lrConfig.MaxPeriod; // seconds //double freqBinWidth = sr / (double)windowSize; double freqBinWidth = sr / (double)sonoConfig.WindowSize; //i: MAKE SONOGRAM double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); int step = (int)Math.Round(framesPerSecond); //take one second steps int stepCount = rowCount / step; int sampleLength = 64; //64 frames = 3.7 seconds. Suitable for Lewins Rail. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 3; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if (period < minPeriod || period > maxPeriod) { continue; } // lay down score for sample length for (int j = 0; j < sampleLength; j++) { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //###################################################################### //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 5); var predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerBandMinHz, upperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray, segmentStartOffset); var hits = new double[rowCount, colCount]; //###################################################################### var scorePlot = new Plot("L.pect", intensity, lrConfig.IntensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays DataTools.Normalise(intensity, lrConfig.DecibelThreshold, out var normalisedScores, out var normalisedThreshold); var intensityPlot = new Plot("Intensity", normalisedScores, normalisedThreshold); DataTools.Normalise(periodicity, 10, out normalisedScores, out normalisedThreshold); var periodicityPlot = new Plot("Periodicity", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, intensityPlot, periodicityPlot }; debugImage = DrawDebugImage(sonogram, predictedEvents, debugPlots, hits); } return(Tuple.Create(sonogram, hits, intensity, predictedEvents, debugImage)); } //Analysis()
//#IntensityThreshold: 0.15 //# Event threshold - Determines FP / FN trade-off for events. //EventThreshold: 0.2 public static (List <AcousticEvent>, double[]) GetComponentsWithHarmonics( SpectrogramStandard sonogram, int minHz, int maxHz, int nyquist, double decibelThreshold, double dctThreshold, double minDuration, double maxDuration, int minFormantGap, int maxFormantGap, TimeSpan segmentStartOffset) { // Event threshold - Determines FP / FN trade-off for events. //double eventThreshold = 0.2; var sonogramData = sonogram.Data; int frameCount = sonogramData.GetLength(0); int binCount = sonogramData.GetLength(1); double freqBinWidth = nyquist / (double)binCount; int minBin = (int)Math.Round(minHz / freqBinWidth); int maxBin = (int)Math.Round(maxHz / freqBinWidth); int bandWidthBins = maxBin - minBin + 1; // extract the sub-band double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, frameCount - 1, maxBin); //ii: DETECT HARMONICS // now look for harmonics in search band using the Xcorrelation-DCT method. var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold); // set up score arrays double[] dBArray = results.Item1; double[] harmonicIntensityScores = results.Item2; //an array of formant intesnity int[] maxIndexArray = results.Item3; for (int r = 0; r < frameCount; r++) { if (harmonicIntensityScores[r] < dctThreshold) { continue; } //ignore locations with incorrect formant gap int maxId = maxIndexArray[r]; double freqBinGap = 2 * bandWidthBins / (double)maxId; double formantGap = freqBinGap * freqBinWidth; if (formantGap < minFormantGap || formantGap > maxFormantGap) { harmonicIntensityScores[r] = 0.0; } } // smooth the harmonicIntensityScores array to allow for brief gaps. harmonicIntensityScores = DataTools.filterMovingAverageOdd(harmonicIntensityScores, 5); //extract the events based on length and threshhold. // Note: This method does NOT do prior smoothing of the score array. var acousticEvents = AcousticEvent.ConvertScoreArray2Events( harmonicIntensityScores, minHz, maxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, decibelThreshold, minDuration, maxDuration, segmentStartOffset); return(acousticEvents, harmonicIntensityScores); }
} // FELTWithBinaryTemplate() /// <summary> /// Scans a recording given a dicitonary of parameters and a syntactic template /// Template has a different orientation to others. /// </summary> /// <param name="sonogram"></param> /// <param name="dict"></param> /// <param name="templateMatrix"></param> /// <param name="segmentStartOffset"></param> /// <param name="recording"></param> /// <param name="templatePath"></param> /// <returns></returns> public static Tuple <SpectrogramStandard, List <AcousticEvent>, double[]> FELTWithSprTemplate(SpectrogramStandard sonogram, Dictionary <string, string> dict, char[,] templateMatrix, TimeSpan segmentStartOffset) { //i: get parameters from dicitonary string callName = dict[FeltTemplate_Create.key_CALL_NAME]; bool doSegmentation = bool.Parse(dict[FeltTemplate_Create.key_DO_SEGMENTATION]); double smoothWindow = double.Parse(dict[FeltTemplate_Create.key_SMOOTH_WINDOW]); //before segmentation int minHz = int.Parse(dict[FeltTemplate_Create.key_MIN_HZ]); int maxHz = int.Parse(dict[FeltTemplate_Create.key_MAX_HZ]); double minDuration = double.Parse(dict[FeltTemplate_Create.key_MIN_DURATION]); //min duration of event in seconds double dBThreshold = double.Parse(dict[FeltTemplate_Create.key_DECIBEL_THRESHOLD]); // = 9.0; // dB threshold dBThreshold = 4.0; int binCount = (int)(maxHz / sonogram.FBinWidth) - (int)(minHz / sonogram.FBinWidth) + 1; Log.WriteLine("Freq band: {0} Hz - {1} Hz. (Freq bin count = {2})", minHz, maxHz, binCount); //ii: TEMPLATE INFO double templateDuration = templateMatrix.GetLength(0) / sonogram.FramesPerSecond; Log.WriteIfVerbose("Template duration = {0:f3} seconds or {1} frames.", templateDuration, templateMatrix.GetLength(0)); Log.WriteIfVerbose("Min Duration: " + minDuration + " seconds"); //iii: DO SEGMENTATION double segmentationThreshold = 2.0; // Standard deviations above backgorund noise double maxDuration = double.MaxValue; // Do not constrain maximum length of events. var tuple1 = AcousticEvent.GetSegmentationEvents((SpectrogramStandard)sonogram, doSegmentation, segmentStartOffset, minHz, maxHz, smoothWindow, segmentationThreshold, minDuration, maxDuration); var segmentEvents = tuple1.Item1; //iv: Score sonogram for events matching template //############################################################################################################################################# var tuple2 = FindMatchingEvents.Execute_Spr_Match(templateMatrix, sonogram, segmentEvents, minHz, maxHz, dBThreshold); //var tuple2 = FindMatchingEvents.Execute_StewartGage(target, dynamicRange, (SpectralSonogram)sonogram, segmentEvents, minHz, maxHz, minDuration); //var tuple2 = FindMatchingEvents.Execute_SobelEdges(target, dynamicRange, (SpectralSonogram)sonogram, segmentEvents, minHz, maxHz, minDuration); //var tuple2 = FindMatchingEvents.Execute_MFCC_XCOR(target, dynamicRange, sonogram, segmentEvents, minHz, maxHz, minDuration); var scores = tuple2.Item1; //############################################################################################################################################# //v: PROCESS SCORE ARRAY //scores = DataTools.filterMovingAverage(scores, 3); LoggedConsole.WriteLine("Scores: min={0:f4}, max={1:f4}, threshold={2:f2}dB", scores.Min(), scores.Max(), dBThreshold); //Set (scores < 0.0) = 0.0; for (int i = 0; i < scores.Length; i++) { if (scores[i] < 0.0) { scores[i] = 0.0; } } //vi: EXTRACT EVENTS List <AcousticEvent> matchEvents = AcousticEvent.ConvertScoreArray2Events(scores, minHz, maxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, dBThreshold, minDuration, maxDuration, segmentStartOffset); foreach (AcousticEvent ev in matchEvents) { ev.FileName = sonogram.Configuration.SourceFName; ev.Name = sonogram.Configuration.CallName; } // Edit the events to correct the start time, duration and end of events to match the max score and length of the template. AdjustEventLocation(matchEvents, callName, templateDuration, sonogram.Duration.TotalSeconds); return(Tuple.Create(sonogram, matchEvents, scores)); } // FELTWithSprTemplate()
/// <summary> /// Calculates the mean intensity in a freq band defined by its min and max freq. /// THis method averages dB log values incorrectly but it is faster than doing many log conversions. /// This method is used to find acoustic events and is accurate enough for the purpose. /// </summary> public static (List <AcousticEvent>, double[]) GetWhistles( SpectrogramStandard sonogram, int minHz, int maxHz, int nyquist, double decibelThreshold, double minDuration, double maxDuration, TimeSpan segmentStartOffset) { var sonogramData = sonogram.Data; int frameCount = sonogramData.GetLength(0); int binCount = sonogramData.GetLength(1); double binWidth = nyquist / (double)binCount; int minBin = (int)Math.Round(minHz / binWidth); int maxBin = (int)Math.Round(maxHz / binWidth); // list of accumulated acoustic events var events = new List <AcousticEvent>(); var combinedIntensityArray = new double[frameCount]; // for all frequency bins except top and bottom for (int bin = minBin + 1; bin < maxBin; bin++) { // set up an intensity array for the frequency bin. double[] intensity = new double[frameCount]; // buffer zone around whistle is four bins wide. if (minBin < 4) { // for all time frames in this frequency bin for (int t = 0; t < frameCount; t++) { var bandIntensity = (sonogramData[t, bin - 1] + sonogramData[t, bin] + sonogramData[t, bin + 1]) / 3.0; var topSideBandIntensity = (sonogramData[t, bin + 3] + sonogramData[t, bin + 4] + sonogramData[t, bin + 5]) / 3.0; intensity[t] = bandIntensity - topSideBandIntensity; intensity[t] = Math.Max(0.0, intensity[t]); } } else { // for all time frames in this frequency bin for (int t = 0; t < frameCount; t++) { var bandIntensity = (sonogramData[t, bin - 1] + sonogramData[t, bin] + sonogramData[t, bin + 1]) / 3.0; var topSideBandIntensity = (sonogramData[t, bin + 3] + sonogramData[t, bin + 4] + sonogramData[t, bin + 5]) / 6.0; var bottomSideBandIntensity = (sonogramData[t, bin - 3] + sonogramData[t, bin - 4] + sonogramData[t, bin - 5]) / 6.0; intensity[t] = bandIntensity - topSideBandIntensity - bottomSideBandIntensity; intensity[t] = Math.Max(0.0, intensity[t]); } } // smooth the decibel array to allow for brief gaps. intensity = DataTools.filterMovingAverageOdd(intensity, 7); //calculate the Hertz bounds of the acoustic events for these freq bins int bottomHzBound = (int)Math.Floor(sonogram.FBinWidth * (bin - 1)); int topHzBound = (int)Math.Ceiling(sonogram.FBinWidth * (bin + 2)); //extract the events based on length and threshhold. // Note: This method does NOT do prior smoothing of the dB array. var acousticEvents = AcousticEvent.ConvertScoreArray2Events( intensity, bottomHzBound, topHzBound, sonogram.FramesPerSecond, sonogram.FBinWidth, decibelThreshold, minDuration, maxDuration, segmentStartOffset); // add to conbined intensity array for (int t = 0; t < frameCount; t++) { //combinedIntensityArray[t] += intensity[t]; combinedIntensityArray[t] = Math.Max(intensity[t], combinedIntensityArray[t]); } // combine events events.AddRange(acousticEvents); } //end for all freq bins // combine adjacent acoustic events events = AcousticEvent.CombineOverlappingEvents(events, segmentStartOffset); return(events, combinedIntensityArray); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; const int frameSize = 256; const double windowOverlap = 0.0; double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // duration of DCT in seconds //double dctDuration = (double)configuration[AnalysisKeys.DctDuration]; // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double decibelThreshold = configuration.GetDouble(AnalysisKeys.DecibelThreshold); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; var recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(maxHz / freqBinWidth) + 1; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double framesPerSecond = freqBinWidth; double minPeriod = 1 / (double)maxOscilFreq; double maxPeriod = 1 / (double)minOscilFreq; double dctDuration = 5 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // remove baseline from amplitude array var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7); // remove hi freq content from amplitude array var lowPassFilteredSignal = DataTools.filterMovingAverageOdd(amplitudeArray, 11); var dctScores = new double[highPassFilteredSignal.Length]; const int step = 2; for (int i = dctLength; i < highPassFilteredSignal.Length - dctLength; i += step) { if (highPassFilteredSignal[i] < decibelThreshold) { continue; } double[] subArray = DataTools.Subarray(highPassFilteredSignal, i, dctLength); // Look for oscillations in the highPassFilteredSignal Oscillations2014.GetOscillationUsingDct(subArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; if (!periodWithinBounds) { continue; } if (intensity < dctThreshold) { continue; } //lay down score for sample length for (int j = 0; j < dctLength; j++) { if (dctScores[i + j] < intensity && lowPassFilteredSignal[i + j] > decibelThreshold) { dctScores[i + j] = intensity; } } } //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var acousticEvents = AcousticEvent.ConvertScoreArray2Events( dctScores, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); // ###################################################################### acousticEvents.ForEach(ae => { ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = abbreviatedSpeciesName; }); var plot = new Plot(this.DisplayName, dctScores, eventThreshold); var plots = new List <Plot> { plot }; // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(highPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold); DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { ampltdPlot, lowPassPlot, demeanedPlot, plot }; Image debugImage = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, debugPlots, null); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } return(new RecognizerResults() { Sonogram = sonogram, Hits = null, Plots = plots, Events = acousticEvents, }); }
} //Analyze() /// <summary> /// ################ THE KEY ANALYSIS METHOD /// Returns a DataTable /// </summary> /// <param name="fiSegmentOfSourceFile"></param> /// <param name="configDict"></param> /// <param name="segmentStartOffset"></param> /// <param name="diOutputDir"></param> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - ignor those set by user int frameSize = 1024; double windowOverlap = 0.0; int upperBandMinHz = int.Parse(configDict[KeyUpperfreqbandBtm]); int upperBandMaxHz = int.Parse(configDict[KeyUpperfreqbandTop]); int lowerBandMinHz = int.Parse(configDict[KeyLowerfreqbandBtm]); int lowerBandMaxHz = int.Parse(configDict[KeyLowerfreqbandTop]); double decibelThreshold = double.Parse(configDict[KeyDecibelThreshold]);; //dB double intensityThreshold = double.Parse(configDict[KeyIntensityThreshold]); //in 0-1 double minDuration = double.Parse(configDict[KeyMinDuration]); // seconds double maxDuration = double.Parse(configDict[KeyMaxDuration]); // seconds double minPeriod = double.Parse(configDict[KeyMinPeriod]); // seconds double maxPeriod = double.Parse(configDict[KeyMaxPeriod]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = recording.BaseName; sonoConfig.WindowSize = frameSize; sonoConfig.WindowOverlap = windowOverlap; //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"); TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, (rowCount - 1), lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, (rowCount - 1), upperBandMaxBin); int step = (int)Math.Round(framesPerSecond); //take one second steps int stepCount = rowCount / step; int sampleLength = 64; //64 frames = 3.7 seconds. Suitable for Lewins Rail. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if ((lowerSubarray.Length != sampleLength) || (upperSubarray.Length != sampleLength)) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 3; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if ((period < minPeriod) || (period > maxPeriod)) { continue; } for (int j = 0; j < sampleLength; j++) //lay down score for sample length { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //###################################################################### //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 5); List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerBandMinHz, upperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray); var hits = new double[rowCount, colCount]; return(Tuple.Create(sonogram, hits, intensity, predictedEvents, tsRecordingtDuration)); } //Analysis()
public static void Execute(Arguments arguments) { if (arguments == null) { arguments = Dev(); } LoggedConsole.WriteLine("DATE AND TIME:" + DateTime.Now); LoggedConsole.WriteLine("Syntactic Pattern Recognition\n"); //StringBuilder sb = new StringBuilder("DATE AND TIME:" + DateTime.Now + "\n"); //sb.Append("SCAN ALL RECORDINGS IN A DIRECTORY USING HTK-RECOGNISER\n"); Log.Verbosity = 1; FileInfo recordingPath = arguments.Source; FileInfo iniPath = arguments.Config; DirectoryInfo outputDir = arguments.Output; string opFName = "SPR-output.txt"; string opPath = outputDir + opFName; Log.WriteIfVerbose("# Output folder =" + outputDir); // A: READ PARAMETER VALUES FROM INI FILE var config = new ConfigDictionary(iniPath); Dictionary <string, string> dict = config.GetTable(); Dictionary <string, string> .KeyCollection keys = dict.Keys; string callName = dict[key_CALL_NAME]; double frameOverlap = Convert.ToDouble(dict[key_FRAME_OVERLAP]); //SPT PARAMETERS double intensityThreshold = Convert.ToDouble(dict[key_SPT_INTENSITY_THRESHOLD]); int smallLengthThreshold = Convert.ToInt32(dict[key_SPT_SMALL_LENGTH_THRESHOLD]); //WHIPBIRD PARAMETERS int whistle_MinHz = int.Parse(dict[key_WHISTLE_MIN_HZ]); int whistle_MaxHz = int.Parse(dict[key_WHISTLE_MAX_HZ]); double optimumWhistleDuration = double.Parse(dict[key_WHISTLE_DURATION]); //optimum duration of whistle in seconds int whip_MinHz = (dict.ContainsKey(key_WHIP_MIN_HZ)) ? int.Parse(dict[key_WHIP_MIN_HZ]) : 0; int whip_MaxHz = (dict.ContainsKey(key_WHIP_MAX_HZ)) ? int.Parse(dict[key_WHIP_MAX_HZ]) : 0; double whipDuration = (dict.ContainsKey(key_WHIP_DURATION)) ? double.Parse(dict[key_WHIP_DURATION]) : 0.0; //duration of whip in seconds //CURLEW PARAMETERS double minDuration = (dict.ContainsKey(key_MIN_DURATION)) ? double.Parse(dict[key_MIN_DURATION]) : 0.0; //min duration of call in seconds double maxDuration = (dict.ContainsKey(key_MAX_DURATION)) ? double.Parse(dict[key_MAX_DURATION]) : 0.0; //duration of call in seconds double eventThreshold = double.Parse(dict[key_EVENT_THRESHOLD]); //min score for an acceptable event int DRAW_SONOGRAMS = Convert.ToInt16(dict[key_DRAW_SONOGRAMS]); // B: CHECK to see if conversion from .MP3 to .WAV is necessary var destinationAudioFile = recordingPath; //LOAD RECORDING AND MAKE SONOGRAM BaseSonogram sonogram = null; using (var recording = new AudioRecording(destinationAudioFile.FullName)) { // if (recording.SampleRate != 22050) recording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE var sonoConfig = new SonogramConfig { NoiseReductionType = NoiseReductionType.None, //NoiseReductionType = NoiseReductionType.STANDARD, WindowOverlap = frameOverlap, }; sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); } List <AcousticEvent> predictedEvents = null; double[,] hits = null; double[] scores = null; var audioFileName = Path.GetFileNameWithoutExtension(destinationAudioFile.FullName); if (callName.Equals("WHIPBIRD")) { //SPT var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold); //SPR Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold); int slope = 0; //degrees of the circle. i.e. 90 = vertical line. double sensitivity = 0.7; //lower value = more sensitive var mHori = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity); slope = 87; //84 sensitivity = 0.8; //lower value = more sensitive var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 4, intensityThreshold + 1, sensitivity); Log.WriteLine("SPR finished"); Log.WriteLine("Extract Whipbird calls - start"); int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth); int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth); int whistleFrames = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); //86 = frames/sec. int minBound_Whip = (int)(whip_MinHz / sonogram.FBinWidth); int maxBound_Whip = (int)(whip_MaxHz / sonogram.FBinWidth); int whipFrames = (int)(sonogram.FramesPerSecond * whipDuration); //86 = frames/sec. var result3 = DetectWhipBird(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames, minBound_Whip, maxBound_Whip, whipFrames, smallLengthThreshold); scores = result3.Item1; hits = DataTools.AddMatrices(mHori, mVert); predictedEvents = AcousticEvent.ConvertScoreArray2Events( scores, whip_MinHz, whip_MaxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, eventThreshold, minDuration, maxDuration, TimeSpan.Zero); foreach (AcousticEvent ev in predictedEvents) { ev.FileName = audioFileName; ev.Name = callName; } sonogram.Data = result1.Item1; Log.WriteLine("Extract Whipbird calls - finished"); } else if (callName.Equals("CURLEW")) { //SPT double backgroundThreshold = 4.0; var result1 = SNR.NoiseReduce(sonogram.Data, NoiseReductionType.Standard, backgroundThreshold); //var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold); //var result1 = doNoiseRemoval(sonogram, intensityThreshold, smallLengthThreshold); //SPR Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold); int slope = 20; //degrees of the circle. i.e. 90 = vertical line. double sensitivity = 0.8; //lower value = more sensitive var mHori = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity); slope = 160; sensitivity = 0.8; //lower value = more sensitive var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 3, intensityThreshold + 1, sensitivity); Log.WriteLine("SPR finished"); //detect curlew calls int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth); int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth); int whistleFrames = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); var result3 = DetectCurlew(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames, smallLengthThreshold); //process curlew scores - look for curlew characteristic periodicity double minPeriod = 1.2; double maxPeriod = 1.8; int minPeriod_frames = (int)Math.Round(sonogram.FramesPerSecond * minPeriod); int maxPeriod_frames = (int)Math.Round(sonogram.FramesPerSecond * maxPeriod); scores = DataTools.filterMovingAverage(result3.Item1, 21); scores = DataTools.PeriodicityDetection(scores, minPeriod_frames, maxPeriod_frames); //extract events predictedEvents = AcousticEvent.ConvertScoreArray2Events( scores, whistle_MinHz, whistle_MaxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, eventThreshold, minDuration, maxDuration, TimeSpan.Zero); foreach (AcousticEvent ev in predictedEvents) { ev.FileName = audioFileName; ev.Name = callName; } hits = DataTools.AddMatrices(mHori, mVert); sonogram.Data = result1.Item1; Log.WriteLine("Extract Curlew calls - finished"); } else if (callName.Equals("CURRAWONG")) { //SPT var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold); //SPR Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold); int slope = 70; //degrees of the circle. i.e. 90 = vertical line. //slope = 210; double sensitivity = 0.7; //lower value = more sensitive var mHori = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity); slope = 110; //slope = 340; sensitivity = 0.7; //lower value = more sensitive var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 3, intensityThreshold + 1, sensitivity); Log.WriteLine("SPR finished"); int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth); int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth); int whistleFrames = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); //86 = frames/sec. var result3 = DetectCurlew(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames + 10, smallLengthThreshold); scores = result3.Item1; hits = DataTools.AddMatrices(mHori, mVert); predictedEvents = AcousticEvent.ConvertIntensityArray2Events( scores, TimeSpan.Zero, whistle_MinHz, whistle_MaxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, eventThreshold, 0.5, maxDuration); foreach (AcousticEvent ev in predictedEvents) { ev.FileName = audioFileName; //ev.Name = callName; } } //write event count to results file. double sigDuration = sonogram.Duration.TotalSeconds; //string fname = Path.GetFileName(recordingPath); int count = predictedEvents.Count; Log.WriteIfVerbose("Number of Events: " + count); string str = string.Format("{0}\t{1}\t{2}", callName, sigDuration, count); FileTools.WriteTextFile(opPath, AcousticEvent.WriteEvents(predictedEvents, str).ToString()); // SAVE IMAGE string imageName = outputDir + audioFileName; string imagePath = imageName + ".png"; if (File.Exists(imagePath)) { int suffix = 1; while (File.Exists(imageName + "." + suffix.ToString() + ".png")) { suffix++; } //{ // suffix = (suffix == string.Empty) ? "1" : (int.Parse(suffix) + 1).ToString(); //} //File.Delete(outputDir + audioFileName + "." + suffix.ToString() + ".png"); File.Move(imagePath, imageName + "." + suffix.ToString() + ".png"); } //string newPath = imagePath + suffix + ".png"; if (DRAW_SONOGRAMS == 2) { DrawSonogram(sonogram, imagePath, hits, scores, predictedEvents, eventThreshold); } else if ((DRAW_SONOGRAMS == 1) && (predictedEvents.Count > 0)) { DrawSonogram(sonogram, imagePath, hits, scores, predictedEvents, eventThreshold); } Log.WriteIfVerbose("Image saved to: " + imagePath); //string savePath = outputDir + Path.GetFileNameWithoutExtension(recordingPath); //string suffix = string.Empty; //Image im = sonogram.GetImage(false, false); //string newPath = savePath + suffix + ".jpg"; //im.Save(newPath); LoggedConsole.WriteLine("\nFINISHED RECORDING!"); Console.ReadLine(); }
/// <summary> /// THIS IS THE CORE DETECTION METHOD /// Detects the human voice /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double minDuration = double.Parse(configDict["MIN_DURATION"]); // seconds double maxDuration = double.Parse(configDict["MAX_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig { //default values config SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; var tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); //ii: DETECT HARMONICS int zeroBinCount = 4; //to remove low freq content which dominates the spectrum var results = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount); double[] intensity = results.Item1; double[] periodicity = results.Item2; //an array of periodicity scores //intensity = DataTools.filterMovingAverage(intensity, 3); //expect humans to have max power >100 and < 1000 Hz. Set these bounds int lowerHumanMaxBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise int upperHumanMaxBound = (int)(3000 / freqBinWidth); //ignore above 2500 hz double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < intensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < lowerHumanMaxBound; j++) { spectrum[j] = 0.0; } for (int j = upperHumanMaxBound; j < spectrum.Length; j++) { spectrum[j] = 0.0; } double[] peakvalues = DataTools.GetPeakValues(spectrum); int maxIndex1 = DataTools.GetMaxIndex(peakvalues); peakvalues[maxIndex1] = 0.0; int maxIndex2 = DataTools.GetMaxIndex(peakvalues); int avMaxBin = (maxIndex1 + maxIndex2) / 2; //int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); int freqWithMaxPower = (int)Math.Round(avMaxBin * freqBinWidth); double discount = 1.0; if (freqWithMaxPower > 1000) { discount = 0.0; } else if (freqWithMaxPower < 500) { discount = 0.0; } //set scoreArray[r] - ignore locations with low intensity if (intensity[r] > intensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = intensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( scoreArray, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); //remove isolated speech events - expect humans to talk like politicians //predictedEvents = Human2.FilterHumanSpeechEvents(predictedEvents); Plot plot = new Plot(AnalysisName, intensity, intensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// ################ THE KEY ANALYSIS METHOD for TRILLS /// /// See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles. /// </summary> /// <param name="recording"></param> /// <param name="sonoConfig"></param> /// <param name="lwConfig"></param> /// <param name="returnDebugImage"></param> /// <param name="segmentStartOffset"></param> /// <returns></returns> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaWatjulumConfig lwConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { double intensityThreshold = lwConfig.IntensityThreshold; double minDuration = lwConfig.MinDurationOfTrill; // seconds double maxDuration = lwConfig.MaxDurationOfTrill; // seconds double minPeriod = lwConfig.MinPeriod; // seconds double maxPeriod = lwConfig.MaxPeriod; // seconds if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 4 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); //int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7); // Could smooth here rather than above. Above seemed slightly better? //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); //differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, lwConfig.DecibelThreshold, minDuration, maxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // LOOK FOR TRILL EVENTS // init the score array double[] scores = new double[rowCount]; // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedTrillEvents) { int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); double oscilFreq; double period; double intensity; Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } for (int j = 0; j < dctLength; j++) //lay down score for sample length { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}"; ae.Score_MaxInEvent = maximumIntensity; ae.Profile = lwConfig.ProfileNames[0]; confirmedEvents.Add(ae); } } //###################################################################### // LOOK FOR TINK EVENTS // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS double minDurationOfTink = lwConfig.MinDurationOfTink; // seconds double maxDurationOfTink = lwConfig.MaxDurationOfTink; // seconds // want stronger threshold for tink because brief. double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0; var predictedTinkEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, tinkDecibelThreshold, minDurationOfTink, maxDurationOfTink, segmentStartOffset); foreach (var ae2 in predictedTinkEvents) { // Prune the list of potential acoustic events, for example using Cosine Similarity. //rowtop, rowWidth //int eventStart = ae2.Oblong.RowTop; //int eventWidth = ae2.Oblong.RowWidth; //int step = 2; //double maximumIntensity = 0.0; // add abbreviatedSpeciesName into event //if (maximumIntensity >= intensityThreshold) //{ ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}"; //ae2.Score_MaxInEvent = maximumIntensity; ae2.Profile = lwConfig.ProfileNames[1]; confirmedEvents.Add(ae2); //} } //###################################################################### var scorePlot = new Plot(lwConfig.SpeciesName, scores, intensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
/// <summary> /// THE KEY ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaBicolorConfig lbConfig, bool drawDebugImage, TimeSpan segmentStartOffset) { double decibelThreshold = lbConfig.DecibelThreshold; //dB double intensityThreshold = lbConfig.IntensityThreshold; //double eventThreshold = lbConfig.EventThreshold; //in 0-1 if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 3 * lbConfig.MaxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0); // Could smooth here rather than above. Above seemed slightly better? amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var predictedEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lbConfig.LowerBandMinHz, lbConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, decibelThreshold, lbConfig.MinDuration, lbConfig.MaxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // init the score array double[] scores = new double[rowCount]; //iii: CONVERT SCORES TO ACOUSTIC EVENTS // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedEvents) { //rowtop, rowWidth int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } // lay down score for sample length for (int j = 0; j < dctLength; j++) { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = "L.b"; ae.Score_MaxInEvent = maximumIntensity; confirmedEvents.Add(ae); } } //###################################################################### // calculate the cosine similarity scores var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold); //DEBUG IMAGE this recognizer only. MUST set false for deployment. Image debugImage = null; if (drawDebugImage) { // display a variety of debug score arrays //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold); //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold); //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold); DataTools.Normalise(amplitudeScores, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; // other debug plots //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot }; debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new LitoriaNasutaConfig(); recognizerConfig.ReadConfigFile(configuration); // BETTER TO SET THESE. IGNORE USER! // this default framesize seems to work const int frameSize = 1024; const double windowOverlap = 0.0; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // use the default HAMMING window //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.None NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.0, }; TimeSpan recordingDuration = recording.WavReader.Time; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1; var decibelThreshold = 3.0; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER int rowCount = sonogram.Data.GetLength(0); double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); //double[] topBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, maxBin + 3, (rowCount - 1), maxBin + 9); //double[] botBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin - 3, (rowCount - 1), minBin - 9); // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER var acousticEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeArray, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, decibelThreshold, recognizerConfig.MinDuration, recognizerConfig.MaxDuration, segmentStartOffset); double[,] hits = null; var prunedEvents = new List <AcousticEvent>(); acousticEvents.ForEach(ae => { ae.SpeciesName = recognizerConfig.SpeciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; }); var thresholdedPlot = new double[amplitudeArray.Length]; for (int x = 0; x < amplitudeArray.Length; x++) { if (amplitudeArray[x] > decibelThreshold) { thresholdedPlot[x] = amplitudeArray[x]; } } var maxDb = amplitudeArray.MaxOrDefault(); double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(thresholdedPlot, decibelThreshold, out normalisedScores, out normalisedThreshold); var text = string.Format($"{this.DisplayName} (Fullscale={maxDb:f1}dB)"); var plot = new Plot(text, normalisedScores, normalisedThreshold); if (true) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { plot, amplPlot }; // NOTE: This DrawDebugImage() method can be over-written in this class. var debugImage = DrawDebugImage(sonogram, acousticEvents, debugPlots, hits); var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); debugImage.Save(debugPath); } return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = acousticEvents, }); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { // common properties string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no name>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // BETTER TO CALCULATE THIS. IGNORE USER! // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]); // duration of DCT in seconds //double dctDuration = (double)configuration[AnalysisKeys.DctDuration]; // minimum acceptable value of a DCT coefficient //double dctThreshold = (double)configuration[AnalysisKeys.DctThreshold]; double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0; double decibelThreshold = configuration.GetDouble("DecibelThreshold"); //double minPeriod = (double)configuration["MinPeriod"]; //: 0.18 //double maxPeriod = (double)configuration["MaxPeriod"]; // //int maxOscilRate = (int)Math.Ceiling(1 /minPeriod); //int minOscilRate = (int)Math.Floor(1 /maxPeriod); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in second var maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event var eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); // this default framesize and overlap is best for the White Hrron of Bhutan. const int frameSize = 2048; double windowOverlap = 0.0; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; var recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(maxHz / freqBinWidth) + 1; /* ############################################################################################################################################# * window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins * 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz * 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz * 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz * 2048 22050 92.8ms 21.5 10.7666 1472ms */ BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); // var templates = GetTemplatesForAlgorithm1(14); var amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); bool[] peakArray = new bool[rowCount]; var amplitudeScores = new double[rowCount]; var hits = new double[rowCount, colCount]; const int maxTemplateLength = 20; const int templateEndPadding = 7; const int templateOffset = 14; const int minimumGap = 4; const int maximumGap = 100; // first find the amplitude peaks for (int j = 1; j < amplitudeArray.Length - 1; j++) { if (amplitudeArray[j] < decibelThreshold) { continue; } if (amplitudeArray[j] > amplitudeArray[j - 1] && amplitudeArray[j] > amplitudeArray[j + 1]) { peakArray[j] = true; } } // get template for end of Herron call var endTemplate = GetEndTemplateForAlgorithm2(); // now search for peaks that are the correct distance apart. for (int i = 2; i < amplitudeArray.Length - maxTemplateLength - templateEndPadding; i++) { if (!peakArray[i]) { continue; } // calculate distance to next peak int distanceToNextPeak = CalculateDistanceToNextPeak(peakArray, i); // skip gaps that are too small or too large if (distanceToNextPeak < minimumGap || distanceToNextPeak > maximumGap) { continue; } // The herron call ends with a rising whip // Check end of call using end template if (distanceToNextPeak > maxTemplateLength) { int start = i - templateOffset; if (start < 0) { start = 0; } var endLocality = DataTools.Subarray(amplitudeArray, start, endTemplate.Length); double endScore = DataTools.CosineSimilarity(endLocality, endTemplate); for (int to = -templateOffset; to < endTemplate.Length - templateOffset; to++) { if (i + to >= 0 && endScore > amplitudeScores[i + to]) { amplitudeScores[i + to] = endScore; // hits[i, minBin] = 10; } } for (int k = 2; k < maxTemplateLength; k++) { amplitudeScores[i + k] = 0.0; } continue; } // Get the start template which depends on distance to next peak. var startTemplate = GetTemplateForAlgorithm2(distanceToNextPeak, templateEndPadding); // now calculate similarity of locality with the startTemplate var locality = DataTools.Subarray(amplitudeArray, i - 2, startTemplate.Length); // i-2 because first two places should be zero. double score = DataTools.CosineSimilarity(locality, startTemplate); for (int t = 0; t < startTemplate.Length; t++) { if (score > amplitudeScores[i + t]) { amplitudeScores[i + t] = score; hits[i, minBin] = 10; } } } // loop over peak array var smoothedScores = DataTools.filterMovingAverageOdd(amplitudeScores, 3); // iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var predictedEvents = AcousticEvent.ConvertScoreArray2Events( smoothedScores, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); var prunedEvents = new List <AcousticEvent>(); foreach (var ae in predictedEvents) { if (ae.EventDurationSeconds < minDuration) { continue; } // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = abbreviatedSpeciesName; prunedEvents.Add(ae); } // do a recognizer test. //CompareArrayWithBenchmark(scores, new FileInfo(recording.FilePath)); //CompareArrayWithBenchmark(prunedEvents, new FileInfo(recording.FilePath)); var plot = new Plot(this.DisplayName, amplitudeScores, eventThreshold); return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = prunedEvents, }); }