/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram. /// Developed for GenericRecognizer of harmonics. /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise. /// But it requires that the frequency of any potential formants is not changing rapidly. /// THis may not be suitable for detecting human speech. However can reduce the frame step. /// </summary> /// <param name="m">spectrogram data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var binCount = m.GetLength(1); //set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(binCount, binCount); // set up arrays to store decibels, formant intensity and max index. var dBArray = new double[rowCount]; var intensity = new double[rowCount]; var maxIndexArray = new int[rowCount]; // for all time frames for (int t = 2; t < rowCount - 2; t++) { // get average of five adjacent frames var frame1 = MatrixTools.GetRow(m, t - 2); var frame2 = MatrixTools.GetRow(m, t - 1); var frame3 = MatrixTools.GetRow(m, t); var frame4 = MatrixTools.GetRow(m, t + 1); var frame5 = MatrixTools.GetRow(m, t + 2); var frame = new double[colCount]; for (int i = 0; i < colCount; i++) { frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5; } double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { // Would normally normalise the xcorr values for overlap count. // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values. //normXr[i] = xr[i] / (colCount - i); normXr[i] = xr[i]; } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Developed for GenericRecognizer of harmonics. /// </summary> /// <param name="m">data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>two arrays.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); double[] dBArray = new double[rowCount]; var intensity = new double[rowCount]; //an array of formant intensity var maxIndexArray = new int[rowCount]; //an array of max value index values var binCount = m.GetLength(1); double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients // for all time frames for (int t = 0; t < rowCount; t++) { var frame = MatrixTools.GetRow(m, t); double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. Also need to normalise the values for overlap count. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { normXr[i] = xr[i] / (colCount - i); } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } // frames = rows of matrix return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
/// <summary> /// ################ THE KEY ANALYSIS METHOD for TRILLS /// /// See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles. /// </summary> /// <param name="recording"></param> /// <param name="sonoConfig"></param> /// <param name="lwConfig"></param> /// <param name="returnDebugImage"></param> /// <param name="segmentStartOffset"></param> /// <returns></returns> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaWatjulumConfig lwConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { double intensityThreshold = lwConfig.IntensityThreshold; double minDuration = lwConfig.MinDurationOfTrill; // seconds double maxDuration = lwConfig.MaxDurationOfTrill; // seconds double minPeriod = lwConfig.MinPeriod; // seconds double maxPeriod = lwConfig.MaxPeriod; // seconds if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 4 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); //int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7); // Could smooth here rather than above. Above seemed slightly better? //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); //differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, lwConfig.DecibelThreshold, minDuration, maxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // LOOK FOR TRILL EVENTS // init the score array double[] scores = new double[rowCount]; // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedTrillEvents) { int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); double oscilFreq; double period; double intensity; Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } for (int j = 0; j < dctLength; j++) //lay down score for sample length { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}"; ae.Score_MaxInEvent = maximumIntensity; ae.Profile = lwConfig.ProfileNames[0]; confirmedEvents.Add(ae); } } //###################################################################### // LOOK FOR TINK EVENTS // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS double minDurationOfTink = lwConfig.MinDurationOfTink; // seconds double maxDurationOfTink = lwConfig.MaxDurationOfTink; // seconds // want stronger threshold for tink because brief. double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0; var predictedTinkEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, tinkDecibelThreshold, minDurationOfTink, maxDurationOfTink, segmentStartOffset); foreach (var ae2 in predictedTinkEvents) { // Prune the list of potential acoustic events, for example using Cosine Similarity. //rowtop, rowWidth //int eventStart = ae2.Oblong.RowTop; //int eventWidth = ae2.Oblong.RowWidth; //int step = 2; //double maximumIntensity = 0.0; // add abbreviatedSpeciesName into event //if (maximumIntensity >= intensityThreshold) //{ ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}"; //ae2.Score_MaxInEvent = maximumIntensity; ae2.Profile = lwConfig.ProfileNames[1]; confirmedEvents.Add(ae2); //} } //###################################################################### var scorePlot = new Plot(lwConfig.SpeciesName, scores, intensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
public static Tuple <double[]> Execute_MFCC_XCOR(double[,] target, double dynamicRange, SpectrogramStandard sonogram, List <AcousticEvent> segments, int minHz, int maxHz, double minDuration) { Log.WriteLine("SEARCHING FOR EVENTS LIKE TARGET."); if (segments == null) { return(null); } int minBin = (int)(minHz / sonogram.FBinWidth); int maxBin = (int)(maxHz / sonogram.FBinWidth); int targetLength = target.GetLength(0); //set up the matrix of cosine coefficients int coeffCount = 12; //only use first 12 coefficients. int binCount = target.GetLength(1); //number of filters in filter bank double[,] cosines = MFCCStuff.Cosines(binCount, coeffCount + 1); //set up the cosine coefficients //adjust target's dynamic range to that set by user target = SNR.SetDynamicRange(target, 3.0, dynamicRange); //set event's dynamic range target = MFCCStuff.Cepstra(target, coeffCount, cosines); double[] v1 = DataTools.Matrix2Array(target); v1 = DataTools.normalise2UnitLength(v1); string imagePath2 = @"C:\SensorNetworks\Output\FELT_Currawong\target.png"; var result1 = BaseSonogram.Data2ImageData(target); var image = result1.Item1; ImageTools.DrawMatrix(image, 1, 1, imagePath2); double[] scores = new double[sonogram.FrameCount]; foreach (AcousticEvent av in segments) { Log.WriteLine("SEARCHING SEGMENT."); int startRow = (int)Math.Round(av.TimeStart * sonogram.FramesPerSecond); int endRow = (int)Math.Round(av.TimeEnd * sonogram.FramesPerSecond); if (endRow >= sonogram.FrameCount) { endRow = sonogram.FrameCount - 1; } endRow -= targetLength; if (endRow <= startRow) { endRow = startRow + 1; //want minimum of one row } for (int r = startRow; r < endRow; r++) { double[,] matrix = DataTools.Submatrix(sonogram.Data, r, minBin, r + targetLength - 1, maxBin); matrix = SNR.SetDynamicRange(matrix, 3.0, dynamicRange); //set event's dynamic range //string imagePath2 = @"C:\SensorNetworks\Output\FELT_Gecko\compare.png"; //var image = BaseSonogram.Data2ImageData(matrix); //ImageTools.DrawMatrix(image, 1, 1, imagePath2); matrix = MFCCStuff.Cepstra(matrix, coeffCount, cosines); double[] v2 = DataTools.Matrix2Array(matrix); v2 = DataTools.normalise2UnitLength(v2); double crossCor = DataTools.DotProduct(v1, v2); scores[r] = crossCor; } //end of rows in segment } //foreach (AcousticEvent av in segments) var tuple = Tuple.Create(scores); return(tuple); }
/// <summary> /// THIS METHOD NO LONGER IN USE. /// NOT USEFUL FOR ANIMAL CALLS. /// Tried this but it is suitable only when there is guarantee of numerous spectral tracks as in the vowels of human speech. /// It yields SPURIOUS RESULTS where there is only one whistle track. /// </summary> public static double[,] DetectHarmonicsUsingDCT(double[,] matrix, int minBin, int maxBin, int hzWidth, bool normaliseDCT, int minPeriod, int maxPeriod, double dctThreshold) { int dctLength = maxBin - minBin + 1; //DCT spans N freq bins int minIndex = (int)(hzWidth / (double)maxPeriod * 2); //Times 0.5 because index = Pi and not 2Pi int maxIndex = (int)(hzWidth / (double)minPeriod * 2); //Times 0.5 because index = Pi and not 2Pi //double period = hzWidth / (double)indexOfMaxValue * 2; //Times 2 because index = Pi and not 2Pi if (maxIndex > dctLength) { maxIndex = dctLength; //safety check in case of future changes to code. } int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); double[,] hits = new double[rows, cols]; double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients for (int r = 0; r < rows - dctLength; r++) { //for (int c = minBin; c <= minBin; c++)//traverse columns - skip DC column //{ var array = new double[dctLength]; //accumulate J rows of values for (int i = 0; i < dctLength; i++) { for (int j = 0; j < 5; j++) { array[i] += matrix[r + j, minBin + i]; } } array = DataTools.SubtractMean(array); // DataTools.writeBarGraph(array); double[] dct = MFCCStuff.DCT(array, cosines); for (int i = 0; i < dctLength; i++) { dct[i] = Math.Abs(dct[i]); //convert to absolute values } for (int i = 0; i < 5; i++) { dct[i] = 0.0; //remove low freq values from consideration } if (normaliseDCT) { dct = DataTools.normalise2UnitLength(dct); } int indexOfMaxValue = DataTools.GetMaxIndex(dct); //DataTools.writeBarGraph(dct); double period = hzWidth / (double)indexOfMaxValue * 2; //Times 2 because index = Pi and not 2Pi //mark DCT location with harmonic freq, only if harmonic freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dct[indexOfMaxValue] > dctThreshold) { for (int i = 0; i < dctLength; i++) { hits[r, minBin + i] = period; } for (int i = 0; i < dctLength; i++) { hits[r + 1, minBin + i] = period; //alternate row } } //c += 5; //skip columns //} r++; //do alternate row } return(hits); }
/// <summary> /// Currently this method is called by only one species recognizer - LitoriaCaerulea. /// </summary> /// <param name="ipArray">an array of decibel values.</param> /// <param name="framesPerSecond">the frame rate.</param> /// <param name="decibelThreshold">Ignore frames below this threshold.</param> /// <param name="dctDuration">Duration in seconds of the required DCT.</param> /// <param name="minOscFreq">minimum oscillation frequency.</param> /// <param name="maxOscFreq">maximum oscillation frequency.</param> /// <param name="dctThreshold">Threshold for the maximum DCT coefficient.</param> /// <param name="dctScores">an array of dct scores.</param> /// <param name="oscFreq">an array of oscillation frequencies.</param> public static void DetectOscillations( double[] ipArray, double framesPerSecond, double decibelThreshold, double dctDuration, double minOscFreq, double maxOscFreq, double dctThreshold, out double[] dctScores, out double[] oscFreq) { int dctLength = (int)Math.Round(framesPerSecond * dctDuration); int minIndex = (int)(minOscFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int maxIndex = (int)(maxOscFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi if (maxIndex > dctLength) { LoggedConsole.WriteWarnLine("MaxIndex > DCT length. Therefore set maxIndex = DCT length."); maxIndex = dctLength; } int length = ipArray.Length; dctScores = new double[length]; oscFreq = new double[length]; //set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //following two lines write bmp image of cosine matrix values for checking. //string bmpPath = @"C:\SensorNetworks\Output\cosines.png"; //ImageTools.DrawMatrix(cosines, bmpPath, true); for (int r = 1; r < length - dctLength; r++) { // only stop if current location is a peak if (ipArray[r] < ipArray[r - 1] || ipArray[r] < ipArray[r + 1]) { continue; } // only stop if current location is a peak if (ipArray[r] < decibelThreshold) { continue; } // extract array and ready for DCT var dctArray = DataTools.Subarray(ipArray, r, dctLength); dctArray = DataTools.SubtractMean(dctArray); double[] dctCoefficient = MFCCStuff.DCT(dctArray, cosines); // convert to absolute values because not interested in negative values due to phase. for (int i = 0; i < dctLength; i++) { dctCoefficient[i] = Math.Abs(dctCoefficient[i]); } // remove low freq oscillations from consideration int thresholdIndex = minIndex / 4; for (int i = 0; i < thresholdIndex; i++) { dctCoefficient[i] = 0.0; } dctCoefficient = DataTools.normalise2UnitLength(dctCoefficient); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficient); //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoefficient[indexOfMaxValue] > dctThreshold) { for (int i = 0; i < dctLength; i++) { if (dctScores[r + i] < dctCoefficient[indexOfMaxValue]) { dctScores[r + i] = dctCoefficient[indexOfMaxValue]; oscFreq[r + i] = indexOfMaxValue / dctDuration / 2; } } } } }
/// <summary> /// THE KEY ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaBicolorConfig lbConfig, bool drawDebugImage, TimeSpan segmentStartOffset) { double decibelThreshold = lbConfig.DecibelThreshold; //dB double intensityThreshold = lbConfig.IntensityThreshold; //double eventThreshold = lbConfig.EventThreshold; //in 0-1 if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 3 * lbConfig.MaxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0); // Could smooth here rather than above. Above seemed slightly better? amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var predictedEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lbConfig.LowerBandMinHz, lbConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, decibelThreshold, lbConfig.MinDuration, lbConfig.MaxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // init the score array double[] scores = new double[rowCount]; //iii: CONVERT SCORES TO ACOUSTIC EVENTS // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedEvents) { //rowtop, rowWidth int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } // lay down score for sample length for (int j = 0; j < dctLength; j++) { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = "L.b"; ae.Score_MaxInEvent = maximumIntensity; confirmedEvents.Add(ae); } } //###################################################################### // calculate the cosine similarity scores var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold); //DEBUG IMAGE this recognizer only. MUST set false for deployment. Image debugImage = null; if (drawDebugImage) { // display a variety of debug score arrays //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold); //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold); //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold); DataTools.Normalise(amplitudeScores, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; // other debug plots //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot }; debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
} // end method ConvertODScores2Events() /* * public static double PeakEntropy(double[] array) * { * bool[] peaks = DataTools.GetPeaks(array); * int peakCount = DataTools.CountTrues(peaks); * //set up histogram of peak energies * double[] histogram = new double[peakCount]; * int count = 0; * for (int k = 0; k < array.Length; k++) * { * if (peaks[k]) * { * histogram[count] = array[k]; * count++; * } * } * histogram = DataTools.NormaliseMatrixValues(histogram); * histogram = DataTools.Normalise2Probabilites(histogram); * double normFactor = Math.Log(histogram.Length) / DataTools.ln2; //normalize for length of the array * double entropy = DataTools.Entropy(histogram) / normFactor; * return entropy; * } * */ /// <summary> /// returns the periodicity in an array of values. /// </summary> public static double[] PeriodicityAnalysis(double[] array) { //DataTools.writeBarGraph(array); var A = AutoAndCrossCorrelation.MyCrossCorrelation(array, array); // do 2/3rds of maximum possible lag int dctLength = A.Length; A = DataTools.SubtractMean(A); //DataTools.writeBarGraph(A); double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients double[] dct = MFCCStuff.DCT(A, cosines); for (int i = 0; i < dctLength; i++) { dct[i] = Math.Abs(dct[i]); //convert to absolute values } //DataTools.writeBarGraph(dct); for (int i = 0; i < 3; i++) { dct[i] = 0.0; //remove low freq oscillations from consideration } dct = DataTools.normalise2UnitLength(dct); var peaks = DataTools.GetPeaks(dct); // remove non-peak values and low values for (int i = 0; i < dctLength; i++) { if (!peaks[i] || dct[i] < 0.2) { dct[i] = 0.0; } } DataTools.writeBarGraph(dct); //get periodicity of highest three values int peakCount = 3; var period = new double[peakCount]; var maxIndex = new double[peakCount]; for (int i = 0; i < peakCount; i++) { int indexOfMaxValue = DataTools.GetMaxIndex(dct); maxIndex[i] = indexOfMaxValue; //double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi if ((double)indexOfMaxValue == 0) { period[i] = 0.0; } else { period[i] = dctLength / (double)indexOfMaxValue * 2; } dct[indexOfMaxValue] = 0.0; // remove value for next iteration } LoggedConsole.WriteLine("Max indices = {0:f0}, {1:f0}, {2:f0}.", maxIndex[0], maxIndex[1], maxIndex[2]); return(period); }
/* * * /// <summary> * /// Detects oscillations in a given freq bin. * /// there are several important parameters for tuning. * /// a) DCTLength: Good values are 0.25 to 0.50 sec. Do not want too long because DCT requires stationarity. * /// Do not want too short because too small a range of oscillations * /// b) DCTindex: Sets lower bound for oscillations of interest. Index refers to array of coeff returned by DCT. * /// Array has same length as the length of the DCT. Low freq oscillations occur more often by chance. Want to exclude them. * /// c) MinAmplitude: minimum acceptable value of a DCT coefficient if hit is to be accepted. * /// The algorithm is sensitive to this value. A lower value results in more oscillation hits being returned. * /// </summary> * /// <param name="minBin">min freq bin of search band</param> * /// <param name="maxBin">max freq bin of search band</param> * /// <param name="dctLength">number of values</param> * /// <param name="DCTindex">Sets lower bound for oscillations of interest.</param> * /// <param name="minAmplitude">threshold - do not accept a DCT value if its amplitude is less than this threshold</param> * public static Double[,] DetectOscillations(SpectrogramStandard sonogram, int minHz, int maxHz, * double dctDuration, int minOscilFreq, int maxOscilFreq, double minAmplitude) * { * int minBin = (int)(minHz / sonogram.FBinWidth); * int maxBin = (int)(maxHz / sonogram.FBinWidth); * * int dctLength = (int)Math.Round(sonogram.FramesPerSecond * dctDuration); * int minIndex = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi * int maxIndex = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi * if (maxIndex > dctLength) maxIndex = dctLength; //safety check in case of future changes to code. * * int rows = sonogram.Data.GetLength(0); * int cols = sonogram.Data.GetLength(1); * Double[,] hits = new Double[rows, cols]; * Double[,] matrix = sonogram.Data; * //matrix = ImageTools.WienerFilter(sonogram.Data, 3);// DO NOT USE - SMUDGES EVERYTHING * * * double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients * //following two lines write matrix of cos values for checking. * //string fPath = @"C:\SensorNetworks\Sonograms\cosines.txt"; * //FileTools.WriteMatrix2File_Formatted(cosines, fPath, "F3"); * * //following two lines write bmp image of cos values for checking. * //string fPath = @"C:\SensorNetworks\Output\cosines.bmp"; * //ImageTools.DrawMatrix(cosines, fPath); * * * * // traverse columns - skip DC column * * * for (int c = minBin; c <= maxBin; c++) { * for (int r = 0; r < rows - dctLength; r++) * { * var array = new double[dctLength]; * //accumulate J columns of values * for (int i = 0; i < dctLength; i++) * for (int j = 0; j < 5; j++) array[i] += matrix[r + i, c + j]; * * array = DataTools.SubtractMean(array); * // DataTools.writeBarGraph(array); * * double[] dct = MFCCStuff.DCT(array, cosines); * for (int i = 0; i < dctLength; i++) dct[i] = Math.Abs(dct[i]);//convert to absolute values * dct[0] = 0.0; dct[1] = 0.0; dct[2] = 0.0; dct[3] = 0.0; dct[4] = 0.0;//remove low freq oscillations from consideration * dct = DataTools.normalise2UnitLength(dct); * //dct = DataTools.NormaliseMatrixValues(dct); //another option to NormaliseMatrixValues * int indexOfMaxValue = DataTools.GetMaxIndex(dct); * double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi * * //DataTools.MinMax(dct, out min, out max); * // DataTools.writeBarGraph(dct); * * //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude * if ((indexOfMaxValue >= minIndex) && (indexOfMaxValue <= maxIndex) && (dct[indexOfMaxValue] > minAmplitude)) * { * for (int i = 0; i < dctLength; i++) hits[r + i, c] = oscilFreq; * } * r += 5; //skip rows * } * c++; //do alternate columns * } * return hits; * } */ public static double[] DetectOscillationsInScoreArray(double[] scoreArray, double dctDuration, double timeScale, double dctThreshold, bool normaliseDCT, int minOscilFreq, int maxOscilFreq) { int dctLength = (int)Math.Round(timeScale * dctDuration); int minIndex = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int maxIndex = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi if (maxIndex > dctLength) { maxIndex = dctLength; //safety check in case of future changes to code. } int length = scoreArray.Length; double[] hits = new double[length]; double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients //following two lines write matrix of cos values for checking. //string fPath = @"C:\SensorNetworks\Sonograms\cosines.txt"; //FileTools.WriteMatrix2File_Formatted(cosines, fPath, "F3"); //following two lines write bmp image of cos values for checking. //string fPath = @"C:\SensorNetworks\Output\cosines.bmp"; //ImageTools.DrawMatrix(cosines, fPath); for (int r = 0; r < length - dctLength; r++) { var array = new double[dctLength]; //transfer values for (int i = 0; i < dctLength; i++) { array[i] = scoreArray[r + i]; } array = DataTools.SubtractMean(array); // DataTools.writeBarGraph(array); double[] dct = MFCCStuff.DCT(array, cosines); for (int i = 0; i < dctLength; i++) { dct[i] = Math.Abs(dct[i]); //convert to absolute values } for (int i = 0; i < 5; i++) { dct[i] = 0.0; //remove low freq oscillations from consideration } if (normaliseDCT) { dct = DataTools.normalise2UnitLength(dct); } int indexOfMaxValue = DataTools.GetMaxIndex(dct); double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi // DataTools.writeBarGraph(dct); //LoggedConsole.WriteLine("oscilFreq = " + oscilFreq); //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dct[indexOfMaxValue] > dctThreshold) { hits[r] = dct[indexOfMaxValue]; hits[r + 1] = dct[indexOfMaxValue]; // because skipping rows. //for (int i = 0; i < dctLength; i++) if (hits[r + i] < dct[indexOfMaxValue]) hits[r + i] = dct[indexOfMaxValue]; } r += 1; //skip rows } return(hits); }
/* * /// <summary> * /// FINDS OSCILLATIONS IN A SONOGRAM * /// SAME METHOD AS ABOVE BUT ..... * /// 1) WITHOUT CALCULATING THE COMPUTATION TIME * /// 2) WITHOUT DOING SEGMENTATION * /// </summary> * /// <param name="sonogram">sonogram derived from the recording</param> * /// <param name="minHz">min bound freq band to search</param> * /// <param name="maxHz">max bound freq band to search</param> * /// <param name="dctDuration">duration of DCT in seconds</param> * /// <param name="dctThreshold">minimum amplitude of DCT </param> * /// <param name="minOscilFreq">ignore oscillation frequencies below this threshold</param> * /// <param name="maxOscilFreq">ignore oscillation frequencies greater than this </param> * /// <param name="scoreThreshold">used for FP/FN</param> * /// <param name="minDuration">ignore hits whose duration is shorter than this</param> * /// <param name="maxDuration">ignore hits whose duration is longer than this</param> * /// <param name="scores">return an array of scores over the entire recording</param> * /// <param name="events">return a list of acoustic events</param> * /// <param name="hits">a matrix to be superimposed over the final sonogram which shows where the DCT coefficients exceeded the threshold</param> * public static void Execute(SpectrogramStandard sonogram, int minHz, int maxHz, * double dctDuration, double dctThreshold, bool normaliseDCT, double minOscilFreq, double maxOscilFreq, * double scoreThreshold, double minDuration, double maxDuration, * out double[] scores, out List<AcousticEvent> events, out Double[,] hits, out double[] oscFreq) * { * //convert the entire recording to an acoustic event - this is the legacy of previous experimentation!!!!!!!!! * List<AcousticEvent> segmentEvents = new List<AcousticEvent>(); * var ae = new AcousticEvent(0.0, sonogram.Duration.TotalSeconds, minHz, maxHz); * ae.SetTimeAndFreqScales(sonogram.FramesPerSecond, sonogram.FBinWidth); * segmentEvents.Add(ae); * * //DETECT OSCILLATIONS * hits = DetectOscillationsInSonogram(sonogram, minHz, maxHz, dctDuration, dctThreshold, normaliseDCT, minOscilFreq, maxOscilFreq, segmentEvents); * hits = RemoveIsolatedOscillations(hits); * * //EXTRACT SCORES AND ACOUSTIC EVENTS * scores = GetOscillationScores(hits, minHz, maxHz, sonogram.FBinWidth);//scores = fraction of BW bins in each row that have an oscilation hit. * scores = DataTools.filterMovingAverage(scores, 3); * oscFreq = GetOscillationFrequency(hits, minHz, maxHz, sonogram.FBinWidth); * events = ConvertODScores2Events(scores, oscFreq, minHz, maxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, sonogram.Configuration.FreqBinCount, scoreThreshold, * minDuration, maxDuration, sonogram.Configuration.SourceFName); * } * */ /// <summary> /// Detects oscillations in a given freq bin. /// there are several important parameters for tuning. /// a) dctDuration: Good values are 0.25 to 0.50 sec. Do not want too long because DCT requires stationarity. /// Do not want too short because too small a range of oscillations /// b) dctThreshold: minimum acceptable value of a DCT coefficient if hit is to be accepted. /// The algorithm is sensitive to this value. A lower value results in more oscillation hits being returned. /// c) Min and Max Oscillaitons: Sets lower & upper bound for oscillations of interest. /// Array has same length as the length of the DCT. Low freq oscillations occur more often by chance. Want to exclude them. /// </summary> /// <param name="minHz">min freq bin of search band.</param> /// <param name="maxHz">max freq bin of search band.</param> public static double[,] DetectOscillationsInSonogram(SpectrogramStandard sonogram, int minHz, int maxHz, double dctDuration, double dctThreshold, bool normaliseDCT, double minOscilFreq, double maxOscilFreq, List <AcousticEvent> events) { if (events == null) { return(null); } int minBin = (int)(minHz / sonogram.FBinWidth); int maxBin = (int)(maxHz / sonogram.FBinWidth); int dctLength = (int)Math.Round(sonogram.FramesPerSecond * dctDuration); int minIndex = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int maxIndex = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi if (maxIndex > dctLength) { maxIndex = dctLength; //safety check in case of future changes to code. } int rows = sonogram.Data.GetLength(0); int cols = sonogram.Data.GetLength(1); double[,] hits = new double[rows, cols]; double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients //following two lines write matrix of cos values for checking. //string fPath = @"C:\SensorNetworks\Sonograms\cosines.txt"; //FileTools.WriteMatrix2File_Formatted(cosines, fPath, "F3"); //following two lines write bmp image of cos values for checking. string fPath = @"C:\SensorNetworks\Output\cosines.bmp"; ImageTools.DrawMatrix(cosines, fPath, true); foreach (AcousticEvent av in events) { int startRow = (int)Math.Round(av.TimeStart * sonogram.FramesPerSecond); int endRow = (int)Math.Round(av.TimeEnd * sonogram.FramesPerSecond); if (endRow >= sonogram.FrameCount) { endRow = sonogram.FrameCount - 1; } endRow -= dctLength; if (endRow <= startRow) { endRow = startRow + 1; //want minimum of one row } // traverse columns for (int c = minBin; c <= maxBin; c++) { for (int r = startRow; r < endRow; r++) { var array = new double[dctLength]; //accumulate J columns of values int N = 5; //average five rows for (int i = 0; i < dctLength; i++) { for (int j = 0; j < N; j++) { array[i] += sonogram.Data[r + i, c + j]; } array[i] /= N; } array = DataTools.SubtractMean(array); // DataTools.writeBarGraph(array); //double entropy = PeakEntropy(array); //if (entropy < 0.85) //{ // r += 6; //skip rows // continue; //} int lowFreqBuffer = 5; double[] dct = MFCCStuff.DCT(array, cosines); for (int i = 0; i < dctLength; i++) { dct[i] = Math.Abs(dct[i]); //convert to absolute values } for (int i = 0; i < lowFreqBuffer; i++) { dct[i] = 0.0; //remove low freq oscillations from consideration } if (normaliseDCT) { dct = DataTools.normalise2UnitLength(dct); } int indexOfMaxValue = DataTools.GetMaxIndex(dct); double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi //DataTools.writeBarGraph(dct); //LoggedConsole.WriteLine("oscilFreq ={0:f2} (max index={1}) Amp={2:f2}", oscilFreq, indexOfMaxValue, dct[indexOfMaxValue]); //calculate specificity i.e. what other oscillations are present. //double offMaxAmplitude = 0.0; //for (int i = lowFreqBuffer; i < dctLength; i++) offMaxAmplitude += dct[i]; //offMaxAmplitude -= (dct[indexOfMaxValue-1] + dct[indexOfMaxValue] + dct[indexOfMaxValue+1]); //offMaxAmplitude /= (dctLength - lowFreqBuffer - 3); //get average //double specificity = 2 * (0.5 - (offMaxAmplitude / dct[indexOfMaxValue])); ////LoggedConsole.WriteLine("avOffAmp={0:f2} specificity ={1:f2}", offMaxAmplitude, specificity); //double threshold = dctThreshold + dctThreshold; //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dct[indexOfMaxValue] > dctThreshold) //if ((indexOfMaxValue >= minIndex) && (indexOfMaxValue <= maxIndex) && ((dct[indexOfMaxValue] * specificity) > threshold)) { for (int i = 0; i < dctLength; i++) { hits[r + i, c] = oscilFreq; } for (int i = 0; i < dctLength; i++) { hits[r + i, c + 1] = oscilFreq; //write alternate column - MUST DO THIS BECAUSE doing alternate columns } } r += 6; //skip rows i.e. frames of the sonogram. } c++; //do alternate columns } } //foreach (AcousticEvent av in events) return(hits); }
public static double[] DetectOscillations(double[] ipArray, double framesPerSecond, double dctDuration, double minOscilFreq, double maxOscilFreq, double dctThreshold) { int dctLength = (int)Math.Round(framesPerSecond * dctDuration); int minIndex = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int maxIndex = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi //double midOscilFreq = minOscilFreq + ((maxOscilFreq - minOscilFreq) / 2); if (maxIndex > dctLength) { return(null); //safety check } int length = ipArray.Length; var dctScores = new double[length]; //var hits = new double[length]; double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients //following two lines write bmp image of cosine matrix values for checking. //string bmpPath = @"C:\SensorNetworks\Output\cosines.png"; //ImageTools.DrawMatrix(cosines, bmpPath, true); for (int r = 1; r < length - dctLength; r++) { // only stop if current location is a peak if (ipArray[r] < ipArray[r - 1] || ipArray[r] < ipArray[r + 1]) { continue; } // extract array and ready for DCT //for (int i = 0; i < dctLength; i++) dctArray[i] = ipArray[r + i]; var dctArray = DataTools.Subarray(ipArray, r, dctLength); dctArray = DataTools.SubtractMean(dctArray); //dctArray = DataTools.Vector2Zscores(dctArray); double[] dctCoeff = MFCCStuff.DCT(dctArray, cosines); // convert to absolute values because not interested in negative values due to phase. for (int i = 0; i < dctLength; i++) { dctCoeff[i] = Math.Abs(dctCoeff[i]); } // remove low freq oscillations from consideration int thresholdIndex = minIndex / 4; for (int i = 0; i < thresholdIndex; i++) { dctCoeff[i] = 0.0; } dctCoeff = DataTools.normalise2UnitLength(dctCoeff); //dct = DataTools.NormaliseMatrixValues(dct); //another option to NormaliseMatrixValues int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff); //double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi // #### Tried this option for scoring oscillation hits but did not work well. // #### Requires very fine tuning of thresholds //dctCoeff = DataTools.Normalise2Probabilites(dctCoeff); //// sum area under curve where looking for oscillations //double sum = 0.0; //for (int i = minIndex; i <= maxIndex; i++) // sum += dctCoeff[i]; //if (sum > dctThreshold) //{ // for (int i = 0; i < dctLength; i++) hits[r + i, c] = midOscilFreq; //} // DEBUGGING // DataTools.MinMax(dctCoeff, out min, out max); //DataTools.writeBarGraph(dctArray); //DataTools.writeBarGraph(dctCoeff); //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoeff[indexOfMaxValue] > dctThreshold) { //for (int i = 0; i < dctLength; i++) dctScores[r + i] = midOscilFreq; for (int i = 0; i < dctLength; i++) { if (dctScores[r + i] < dctCoeff[indexOfMaxValue]) { dctScores[r + i] = dctCoeff[indexOfMaxValue]; } } } } //return hits; //dctArray return(dctScores); }
/// <summary> /// Detects oscillations in a given freq bin. /// there are several important parameters for tuning. /// a) DCTLength: Good values are 0.25 to 0.50 sec. Do not want too long because DCT requires stationarity. /// Do not want too short because too small a range of oscillations /// b) DCTindex: Sets lower bound for oscillations of interest. Index refers to array of coeff returned by DCT. /// Array has same length as the length of the DCT. Low freq oscillations occur more often by chance. Want to exclude them. /// c) MinAmplitude: minimum acceptable value of a DCT coefficient if hit is to be accepted. /// The algorithm is sensitive to this value. A lower value results in more oscillation hits being returned. /// </summary> /// <param name="sonogram"></param> /// <param name="minHz">min freq bin of search band</param> /// <param name="maxHz">max freq bin of search band</param> /// <param name="dctDuration">number of values</param> /// <param name="maxOscilFreq"></param> /// <param name="dctThreshold">threshold - do not accept a DCT coefficient if its value is less than this threshold</param> /// <param name="minOscilFreq"></param> /// <returns></returns> public static double[,] DetectOscillations(SpectrogramStandard sonogram, int minHz, int maxHz, double dctDuration, int minOscilFreq, int maxOscilFreq, double dctThreshold) { int minBin = (int)(minHz / sonogram.FBinWidth); int maxBin = (int)(maxHz / sonogram.FBinWidth); int dctLength = (int)Math.Round(sonogram.FramesPerSecond * dctDuration); int minIndex = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int maxIndex = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int midOscilFreq = minOscilFreq + ((maxOscilFreq - minOscilFreq) / 2); if (maxIndex > dctLength) { return(null); //safety check } int rows = sonogram.Data.GetLength(0); int cols = sonogram.Data.GetLength(1); double[,] hits = new double[rows, cols]; double[,] matrix = sonogram.Data; double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients //following two lines write matrix of cos values for checking. //string txtPath = @"C:\SensorNetworks\Output\cosines.txt"; //FileTools.WriteMatrix2File_Formatted(cosines, txtPath, "F3"); //following two lines write bmp image of cos values for checking. //string bmpPath = @"C:\SensorNetworks\Output\cosines.png"; //ImageTools.DrawMatrix(cosines, bmpPath, true); for (int c = minBin; c <= maxBin; c++) //traverse columns - skip DC column { var dctArray = new double[dctLength]; for (int r = 0; r < rows - dctLength; r++) { // extract array and ready for DCT for (int i = 0; i < dctLength; i++) { dctArray[i] = matrix[r + i, c]; } dctArray = DataTools.SubtractMean(dctArray); //dctArray = DataTools.Vector2Zscores(dctArray); double[] dctCoeff = MFCCStuff.DCT(dctArray, cosines); // convert to absolute values because not interested in negative values due to phase. for (int i = 0; i < dctLength; i++) { dctCoeff[i] = Math.Abs(dctCoeff[i]); } // remove low freq oscillations from consideration int thresholdIndex = minIndex / 4; for (int i = 0; i < thresholdIndex; i++) { dctCoeff[i] = 0.0; } dctCoeff = DataTools.normalise2UnitLength(dctCoeff); //dct = DataTools.NormaliseMatrixValues(dct); //another option to NormaliseMatrixValues int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff); //double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi // #### Tried this option for scoring oscillation hits but did not work well. // #### Requires very fine tuning of thresholds //dctCoeff = DataTools.Normalise2Probabilites(dctCoeff); //// sum area under curve where looking for oscillations //double sum = 0.0; //for (int i = minIndex; i <= maxIndex; i++) // sum += dctCoeff[i]; //if (sum > dctThreshold) //{ // for (int i = 0; i < dctLength; i++) hits[r + i, c] = midOscilFreq; //} // DEBUGGING // DataTools.MinMax(dctCoeff, out min, out max); //DataTools.writeBarGraph(dctArray); //DataTools.writeBarGraph(dctCoeff); //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoeff[indexOfMaxValue] > dctThreshold) { for (int i = 0; i < dctLength; i++) { hits[r + i, c] = midOscilFreq; } } r += 5; //skip rows } c++; //do alternate columns } return(hits); }
/// <summary> /// Detects oscillations in a given freq bin. /// there are several important parameters for tuning. /// a) DCTLength: Good values are 0.25 to 0.50 sec. Do not want too long because DCT requires stationarity. /// Do not want too short because too small a range of oscillations /// b) DCTindex: Sets lower bound for oscillations of interest. Index refers to array of coefficient returned by DCT. /// Array has same length as the length of the DCT. Low freq oscillations occur more often by chance. Want to exclude them. /// c) MinAmplitude: minimum acceptable value of a DCT coefficient if hit is to be accepted. /// The algorithm is sensitive to this value. A lower value results in more oscillation hits being returned. /// </summary> /// <param name="sonogram">A spectrogram.</param> /// <param name="minHz">min freq bin of search band.</param> /// <param name="maxHz">max freq bin of search band.</param> /// <param name="dctDuration">number of values.</param> /// <param name="minOscilFreq">minimum oscillation freq.</param> /// <param name="maxOscilFreq">maximum oscillation freq.</param> /// <param name="dctThreshold">threshold - do not accept a DCT coefficient if its value is less than this threshold.</param> public static double[,] DetectOscillations(SpectrogramStandard sonogram, int minHz, int maxHz, double dctDuration, int minOscilFreq, int maxOscilFreq, double dctThreshold) { int minBin = (int)(minHz / sonogram.FBinWidth); int maxBin = (int)(maxHz / sonogram.FBinWidth); int dctLength = (int)Math.Round(sonogram.FramesPerSecond * dctDuration); int minIndex = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int maxIndex = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi int midOscilFreq = minOscilFreq + ((maxOscilFreq - minOscilFreq) / 2); //safety check if (maxIndex > dctLength) { return(null); } int rows = sonogram.Data.GetLength(0); int cols = sonogram.Data.GetLength(1); double[,] hits = new double[rows, cols]; double[,] matrix = sonogram.Data; double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients //following two lines write matrix of cos values for checking. //string txtPath = @"C:\SensorNetworks\Output\cosines.txt"; //FileTools.WriteMatrix2File_Formatted(cosines, txtPath, "F3"); //following two lines write bmp image of cos values for checking. //string bmpPath = @"C:\SensorNetworks\Output\cosines.png"; //ImageTools.DrawMatrix(cosines, bmpPath, true); //traverse columns - skip DC column for (int c = minBin; c <= maxBin; c++) { var dctArray = new double[dctLength]; for (int r = 0; r < rows - dctLength; r++) { // extract array and ready for DCT for (int i = 0; i < dctLength; i++) { dctArray[i] = matrix[r + i, c]; } int lowerDctBound = minIndex / 4; var dctCoeff = DoDct(dctArray, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff); //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoeff[indexOfMaxValue] > dctThreshold) { for (int i = 0; i < dctLength; i++) { hits[r + i, c] = midOscilFreq; } } r += 5; //skip rows } c++; //do alternate columns } return(hits); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; const int frameSize = 256; const double windowOverlap = 0.0; double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // ignore oscillations below this threshold freq int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq); // ignore oscillations above this threshold freq int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq); // duration of DCT in seconds //double dctDuration = (double)configuration[AnalysisKeys.DctDuration]; // minimum acceptable value of a DCT coefficient double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold); // min duration of event in seconds double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration); // max duration of event in seconds double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration); // min score for an acceptable event double decibelThreshold = configuration.GetDouble(AnalysisKeys.DecibelThreshold); // min score for an acceptable event double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; var recordingDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(maxHz / freqBinWidth) + 1; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double framesPerSecond = freqBinWidth; double minPeriod = 1 / (double)maxOscilFreq; double maxPeriod = 1 / (double)minOscilFreq; double dctDuration = 5 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // remove baseline from amplitude array var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7); // remove hi freq content from amplitude array var lowPassFilteredSignal = DataTools.filterMovingAverageOdd(amplitudeArray, 11); var dctScores = new double[highPassFilteredSignal.Length]; const int step = 2; for (int i = dctLength; i < highPassFilteredSignal.Length - dctLength; i += step) { if (highPassFilteredSignal[i] < decibelThreshold) { continue; } double[] subArray = DataTools.Subarray(highPassFilteredSignal, i, dctLength); // Look for oscillations in the highPassFilteredSignal Oscillations2014.GetOscillationUsingDct(subArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; if (!periodWithinBounds) { continue; } if (intensity < dctThreshold) { continue; } //lay down score for sample length for (int j = 0; j < dctLength; j++) { if (dctScores[i + j] < intensity && lowPassFilteredSignal[i + j] > decibelThreshold) { dctScores[i + j] = intensity; } } } //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var acousticEvents = AcousticEvent.ConvertScoreArray2Events( dctScores, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); // ###################################################################### acousticEvents.ForEach(ae => { ae.SpeciesName = speciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.Name = abbreviatedSpeciesName; }); var plot = new Plot(this.DisplayName, dctScores, eventThreshold); var plots = new List <Plot> { plot }; // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(highPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold); DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { ampltdPlot, lowPassPlot, demeanedPlot, plot }; Image debugImage = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, debugPlots, null); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } return(new RecognizerResults() { Sonogram = sonogram, Hits = null, Plots = plots, Events = acousticEvents, }); }