/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram. /// Developed for GenericRecognizer of harmonics. /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise. /// But it requires that the frequency of any potential formants is not changing rapidly. /// THis may not be suitable for detecting human speech. However can reduce the frame step. /// </summary> /// <param name="m">spectrogram data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var binCount = m.GetLength(1); //set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(binCount, binCount); // set up arrays to store decibels, formant intensity and max index. var dBArray = new double[rowCount]; var intensity = new double[rowCount]; var maxIndexArray = new int[rowCount]; // for all time frames for (int t = 2; t < rowCount - 2; t++) { // get average of five adjacent frames var frame1 = MatrixTools.GetRow(m, t - 2); var frame2 = MatrixTools.GetRow(m, t - 1); var frame3 = MatrixTools.GetRow(m, t); var frame4 = MatrixTools.GetRow(m, t + 1); var frame5 = MatrixTools.GetRow(m, t + 2); var frame = new double[colCount]; for (int i = 0; i < colCount; i++) { frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5; } double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { // Would normally normalise the xcorr values for overlap count. // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values. //normXr[i] = xr[i] / (colCount - i); normXr[i] = xr[i]; } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Developed for GenericRecognizer of harmonics. /// </summary> /// <param name="m">data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>two arrays.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); double[] dBArray = new double[rowCount]; var intensity = new double[rowCount]; //an array of formant intensity var maxIndexArray = new int[rowCount]; //an array of max value index values var binCount = m.GetLength(1); double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients // for all time frames for (int t = 0; t < rowCount; t++) { var frame = MatrixTools.GetRow(m, t); double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. Also need to normalise the values for overlap count. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { normXr[i] = xr[i] / (colCount - i); } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } // frames = rows of matrix return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
} //DetectBarsInTheRowsOfaMatrix() /// <summary> /// A METHOD TO DETECT HARMONICS IN THE ROWS of the passed portion of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Was first developed for crow calls. /// First looks for a decibel profile that matches the passed call duration and decibel loudness. /// Then samples the centre portion for the correct harmonic period. /// </summary> /// <param name="m">data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <param name="callSpan">Minimum length of call of interest.</param> /// <returns>a tuple.</returns> public static Tuple <double[], double[], double[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold, int callSpan) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var intensity = new double[rowCount]; //an array of period intensity var periodicity = new double[rowCount]; //an array of the periodicity values double[] dBArray = MatrixTools.GetRowAverages(m); dBArray = DataTools.filterMovingAverage(dBArray, 3); // for all time frames for (int t = 0; t < rowCount; t++) { if (dBArray[t] < dBThreshold) { continue; } var row = MatrixTools.GetRow(m, t); var spectrum = AutoAndCrossCorrelation.CrossCorr(row, row); int zeroBinCount = 3; //to remove low freq content which dominates the spectrum for (int s = 0; s < zeroBinCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; intensity[t] = intensityValue; double period = 0.0; if (maxId != 0) { period = 2 * colCount / (double)maxId; } periodicity[t] = period; } return(Tuple.Create(dBArray, intensity, periodicity)); }
/// <summary> /// Returns a matrix whose columns consist of autocorrelations of freq bin samples. /// The columns are non-overlapping. /// </summary> public static double[,] GetXcorrByTimeMatrix(double[] signal, int sampleLength) { // NormaliseMatrixValues freq bin values to z-score. This is required else get spurious results signal = DataTools.Vector2Zscores(signal); int sampleCount = signal.Length / sampleLength; double[,] xCorrelationsByTime = new double[sampleLength, sampleCount]; for (int s = 0; s < sampleCount; s++) { int start = s * sampleLength; double[] subArray = DataTools.Subarray(signal, start, sampleLength); double[] autocor = AutoAndCrossCorrelation.AutoCorrelationOldJavaVersion(subArray); //DataTools.writeBarGraph(autocor); MatrixTools.SetColumn(xCorrelationsByTime, s, autocor); } return(xCorrelationsByTime); }
/// <summary> /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// /// </summary> /// <param name="m"></param> /// <param name="amplitudeThreshold"></param> /// <returns></returns> public static Tuple <double[], double[]> DetectBarsInTheRowsOfaMatrix(double[,] m, double threshold, int zeroBinCount) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var intensity = new double[rowCount]; //an array of period intensity var periodicity = new double[rowCount]; //an array of the periodicity values double[] prevRow = MatrixTools.GetRow(m, 0); prevRow = DataTools.DiffFromMean(prevRow); for (int r = 1; r < rowCount; r++) { double[] thisRow = MatrixTools.GetRow(m, r); thisRow = DataTools.DiffFromMean(thisRow); var spectrum = AutoAndCrossCorrelation.CrossCorr(prevRow, thisRow); for (int s = 0; s < zeroBinCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; intensity[r] = intensityValue; double period = 0.0; if (maxId != 0) { period = 2 * colCount / (double)maxId; } periodicity[r] = period; prevRow = thisRow; }// rows return(Tuple.Create(intensity, periodicity)); } //DetectBarsInTheRowsOfaMatrix()
/// <summary> /// Returns a matrix whose columns consist of autocorrelations of freq bin samples. /// The columns are non-overlapping. /// </summary> /// <param name="signal">an array corresponding to one frequency bin.</param> /// <param name="sampleLength">the length of a sample or patch (non-overllapping) for which xcerrelation is obtained.</param> public static double[,] GetXcorrByTimeMatrix(double[] signal, int sampleLength) { // NormaliseMatrixValues freq bin values to z-score. This is required else get spurious results signal = DataTools.Vector2Zscores(signal); // get number of complete non-overlapping samples or patches var sampleCount = signal.Length / sampleLength; var xCorrelationsByTime = new double[sampleLength, sampleCount]; for (var s = 0; s < sampleCount; s++) { var start = s * sampleLength; var subArray = DataTools.Subarray(signal, start, sampleLength); // do xcorr which returns an array same length as the sample or patch. var autocor = AutoAndCrossCorrelation.AutoCorrelationOldJavaVersion(subArray); //DataTools.writeBarGraph(autocor); MatrixTools.SetColumn(xCorrelationsByTime, s, autocor); } // return a matrix of [xCorrLength, sampleLength] return(xCorrelationsByTime); }
/// <summary> /// ################ THE KEY ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - ignore those set by user int frameSize = 128; double windowOverlap = 0.5; double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double minDuration = double.Parse(configDict["MIN_DURATION"]); // seconds double maxDuration = double.Parse(configDict["MAX_DURATION"]); // seconds double minPeriod = double.Parse(configDict["MIN_PERIOD"]); // seconds double maxPeriod = double.Parse(configDict["MAX_PERIOD"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double frameOffset = sonoConfig.GetFrameOffset(sr); double framesPerSecond = 1 / frameOffset; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 256 17640 14.5ms 68.9 68.9 ms hz hz // 512 17640 29.0ms 34.4 34.4 ms hz hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //The Xcorrelation-FFT technique requires number of bins to scan to be power of 2. // Assuming sr=17640 and window=256, then binWidth = 68.9Hz and 1500Hz = bin 21.7.. // Therefore do a Xcorrelation between bins 21 and 22. // Number of frames to span must power of 2. Try 16 frames which covers 232ms - almost 1/4 second. int midHz = 1500; int lowerBin = (int)(midHz / freqBinWidth) + 1; //because bin[0] = DC int upperBin = lowerBin + 4; int lowerHz = (int)Math.Floor((lowerBin - 1) * freqBinWidth); int upperHz = (int)Math.Ceiling((upperBin - 1) * freqBinWidth); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetColumn(sonogram.Data, lowerBin); double[] upperArray = MatrixTools.GetColumn(sonogram.Data, upperBin); lowerArray = DataTools.NormaliseInZeroOne(lowerArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB ####################################################################### upperArray = DataTools.NormaliseInZeroOne(upperArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB ####################################################################### int step = (int)(framesPerSecond / 40); //take one/tenth second steps int stepCount = rowCount / step; int sampleLength = 32; //16 frames = 232ms - almost 1/4 second. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if (lowerSubarray == null || upperSubarray == null) { break; } if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 2; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if (period < minPeriod || period > maxPeriod) { continue; } // lay down score for sample length for (int j = 0; j < sampleLength; j++) { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 3); intensity = DataTools.NormaliseInZeroOne(intensity, 0, 0.5); //## ABSOLUTE NORMALISATION 0-0.5 ####################################################################### List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerHz, upperHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray, segmentStartOffset); var hits = new double[rowCount, colCount]; var plots = new List <Plot>(); //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0)); //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0)); //plots.Add(new Plot("lowerArray", DataTools.NormaliseMatrixValues(lowerArray), 0.25)); //plots.Add(new Plot("upperArray", DataTools.NormaliseMatrixValues(upperArray), 0.25)); //plots.Add(new Plot("intensity", DataTools.NormaliseMatrixValues(intensity), intensityThreshold)); plots.Add(new Plot("intensity", intensity, intensityThreshold)); return(Tuple.Create(sonogram, hits, plots, predictedEvents, tsRecordingtDuration)); } //Analysis()
public static double[] CalculateScores(double[] subBandSpectrum, int windowWidth) { double[] scores = { 0, 0, 0 }; //TEST ONE /* * double totalAreaUnderSpectrum = subBandSpectrum.Sum(); * double areaUnderLowest24bins = 0.0; * for (int i = 0; i < 24; i++) * { * areaUnderLowest24bins += subBandSpectrum[i]; * } * double areaUnderHighBins = totalAreaUnderSpectrum - areaUnderLowest24bins; * double areaUnderBins4to7 = 0.0; * for (int i = 4; i < 7; i++) * { * areaUnderBins4to7 += subBandSpectrum[i]; * } * double ratio1 = areaUnderBins4to7 / areaUnderLowest24bins; * * double areaUnderBins38to72 = 0.0; * for (int i = 38; i < 44; i++) * { * areaUnderBins38to72 += subBandSpectrum[i]; * } * for (int i = 52; i < 57; i++) * { * areaUnderBins38to72 += subBandSpectrum[i]; * } * for (int i = 64; i < 72; i++) * { * areaUnderBins38to72 += subBandSpectrum[i]; * } * double ratio2 = areaUnderBins38to72 / areaUnderHighBins; * double score = (ratio1 * 0.2) + (ratio2 * 0.8); * double[] truePositives = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0006, 0.0014, 0.0015, 0.0010, 0.0002, 0.0001, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0005, 0.0006, 0.0005, 0.0003, 0.0002, 0.0001, 0.0002, 0.0007, 0.0016, 0.0026, 0.0035, 0.0037, 0.0040, 0.0046, 0.0040, 0.0031, 0.0022, 0.0048, 0.0133, 0.0149, 0.0396, 0.1013, 0.1647, 0.2013, 0.2236, 0.2295, 0.1836, 0.1083, 0.0807, 0.0776, 0.0964, 0.1116, 0.0987, 0.1065, 0.1575, 0.3312, 0.4829, 0.5679, 0.5523, 0.4412, 0.2895, 0.2022, 0.2622, 0.2670, 0.2355, 0.1969, 0.2220, 0.6600, 0.9023, 1.0000, 0.8099, 0.8451, 0.8210, 0.5511, 0.1756, 0.0319, 0.0769, 0.0738, 0.2235, 0.3901, 0.4565, 0.4851, 0.3703, 0.3643, 0.2497, 0.2705, 0.3456, 0.3096, 0.1809, 0.0710, 0.0828, 0.0857, 0.0953, 0.1308, 0.1387, 0.0590 }; * * if (score > 0.4) * eventFound = true; * if ((areaUnderHighBins/3) < areaUnderLowest24bins) * //if (ratio1 > ratio2) * { * eventFound = false; * } */ // TEST TWO (A) // these are used for scoring //double[] truePositives1 = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0006, 0.0014, 0.0015, 0.0010, 0.0002, 0.0001, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0005, 0.0006, 0.0005, 0.0003, 0.0002, 0.0001, 0.0002, 0.0007, 0.0016, 0.0026, 0.0035, 0.0037, 0.0040, 0.0046, 0.0040, 0.0031, 0.0022, 0.0048, 0.0133, 0.0149, 0.0396, 0.1013, 0.1647, 0.2013, 0.2236, 0.2295, 0.1836, 0.1083, 0.0807, 0.0776, 0.0964, 0.1116, 0.0987, 0.1065, 0.1575, 0.3312, 0.4829, 0.5679, 0.5523, 0.4412, 0.2895, 0.2022, 0.2622, 0.2670, 0.2355, 0.1969, 0.2220, 0.6600, 0.9023, 1.0000, 0.8099, 0.8451, 0.8210, 0.5511, 0.1756, 0.0319, 0.0769, 0.0738, 0.2235, 0.3901, 0.4565, 0.4851, 0.3703, 0.3643, 0.2497, 0.2705, 0.3456, 0.3096, 0.1809, 0.0710, 0.0828, 0.0857, 0.0953, 0.1308, 0.1387, 0.0590 }; //double[] truePositives2 = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0001, 0.0001, 0.0001, 0.0000, 0.0000, 0.0001, 0.0001, 0.0003, 0.0004, 0.0004, 0.0002, 0.0001, 0.0001, 0.0003, 0.0003, 0.0006, 0.0007, 0.0020, 0.0127, 0.0256, 0.0426, 0.0512, 0.0560, 0.0414, 0.0237, 0.0133, 0.0107, 0.0091, 0.0077, 0.0085, 0.0165, 0.0144, 0.0308, 0.0416, 0.0454, 0.0341, 0.0191, 0.0128, 0.0058, 0.0026, 0.0081, 0.0139, 0.0313, 0.0404, 0.0493, 0.0610, 0.1951, 0.4083, 0.5616, 0.5711, 0.5096, 0.4020, 0.2917, 0.1579, 0.1421, 0.1461, 0.1406, 0.2098, 0.1676, 0.2758, 0.2875, 0.6513, 0.9374, 1.0000, 0.7576, 0.4130, 0.2622, 0.1495, 0.0973, 0.0623, 0.0425, 0.0205, 0.0034, 0.0065, 0.0054, 0.0089, 0.0138, 0.0208, 0.0204, 0.0168, 0.0136, 0.0149, 0.0155, 0.0106, 0.0086, 0.0099, 0.0187 }; //double[] truePositivesA = NormalDist.Convert2ZScores(truePositivesA); //double[] truePositivesB = NormalDist.Convert2ZScores(truePositivesB); // TEST TWO (B) // Use these spectra when using my filtering (i.e. not Chris's prefiltered) // these spectra are used for scoring when the window size is 2048 //double[] truePositives1 = { 0.0014, 0.0012, 0.0009, 0.0003, 0.0001, 0.0005, 0.0008, 0.0029, 0.0057, 0.0070, 0.0069, 0.0063, 0.0053, 0.0032, 0.0013, 0.0011, 0.0011, 0.0007, 0.0000, 0.0006, 0.0010, 0.0013, 0.0008, 0.0009, 0.0022, 0.0046, 0.0069, 0.0082, 0.0070, 0.0065, 0.0082, 0.0078, 0.0052, 0.0021, 0.0132, 0.0357, 0.0420, 0.0996, 0.2724, 0.4557, 0.5739, 0.6366, 0.6155, 0.4598, 0.2334, 0.1468, 0.1410, 0.1759, 0.2157, 0.1988, 0.2131, 0.3072, 0.6161, 0.8864, 1.0000, 0.9290, 0.6983, 0.4208, 0.2690, 0.3190, 0.3109, 0.2605, 0.1896, 0.2118, 0.5961, 0.8298, 0.9290, 0.7363, 0.6605, 0.5840, 0.3576, 0.1019, 0.0162, 0.0400, 0.0405, 0.1106, 0.1803, 0.2083, 0.2058, 0.1475, 0.1387, 0.0870, 0.0804, 0.0975, 0.0848, 0.0490, 0.0193, 0.0217, 0.0210, 0.0214, 0.0253, 0.0254, 0.0072 }; //double[] truePositives2 = { 0.0090, 0.0106, 0.0138, 0.0134, 0.0088, 0.0026, 0.0002, 0.0002, 0.0003, 0.0000, 0.0001, 0.0006, 0.0013, 0.0019, 0.0020, 0.0015, 0.0008, 0.0004, 0.0002, 0.0015, 0.0022, 0.0073, 0.0195, 0.0628, 0.2203, 0.4031, 0.5635, 0.5445, 0.4828, 0.2869, 0.1498, 0.0588, 0.0500, 0.0542, 0.0641, 0.1188, 0.1833, 0.1841, 0.2684, 0.3062, 0.2831, 0.1643, 0.0606, 0.0336, 0.0136, 0.0056, 0.0187, 0.0301, 0.0700, 0.1103, 0.1559, 0.2449, 0.5303, 0.8544, 1.0000, 0.8361, 0.6702, 0.4839, 0.3463, 0.1525, 0.1049, 0.1201, 0.1242, 0.2056, 0.1653, 0.2685, 0.2947, 0.5729, 0.7024, 0.6916, 0.4765, 0.2488, 0.1283, 0.0543, 0.0326, 0.0236, 0.0187, 0.0108, 0.0021, 0.0028, 0.0019, 0.0024, 0.0041, 0.0063, 0.0066, 0.0055, 0.0036, 0.0025, 0.0018, 0.0014, 0.0013, 0.0008, 0.0010 }; // these spectra are used for scoring when the window size is 1024 double[] truePositives1 = { 0.0007, 0.0004, 0.0000, 0.0025, 0.0059, 0.0069, 0.0044, 0.0012, 0.0001, 0.0006, 0.0013, 0.0032, 0.0063, 0.0067, 0.0070, 0.0033, 0.0086, 0.0128, 0.1546, 0.4550, 0.6197, 0.4904, 0.2075, 0.0714, 0.1171, 0.4654, 0.8634, 1.0000, 0.7099, 0.2960, 0.1335, 0.3526, 0.6966, 0.9215, 0.6628, 0.3047, 0.0543, 0.0602, 0.0931, 0.1364, 0.1314, 0.1047, 0.0605, 0.0204, 0.0128, 0.0114 }; double[] truePositives2 = { 0.0126, 0.0087, 0.0043, 0.0002, 0.0000, 0.0010, 0.0018, 0.0016, 0.0005, 0.0002, 0.0050, 0.1262, 0.4054, 0.5111, 0.3937, 0.1196, 0.0156, 0.0136, 0.0840, 0.1598, 0.1691, 0.0967, 0.0171, 0.0152, 0.0234, 0.3648, 0.8243, 1.0000, 0.6727, 0.2155, 0.0336, 0.0240, 0.2661, 0.6240, 0.7523, 0.5098, 0.1493, 0.0149, 0.0046, 0.0020, 0.0037, 0.0061, 0.0061, 0.0036, 0.0010, 0.0008 }; var zscores = NormalDist.Convert2ZScores(subBandSpectrum); double correlationScore = 0.0; double score1 = AutoAndCrossCorrelation.CorrelationCoefficient(zscores, truePositives1); double score2 = AutoAndCrossCorrelation.CorrelationCoefficient(zscores, truePositives2); correlationScore = score1; if (score2 > correlationScore) { correlationScore = score2; } // TEST THREE: sharpness and height of peaks // score the four heighest peaks double peaksScore = 0; double[] spectrumCopy = new double[subBandSpectrum.Length]; for (int i = 0; i < subBandSpectrum.Length; i++) { spectrumCopy[i] = subBandSpectrum[i]; } // set spectrum bounds int lowerBound = subBandSpectrum.Length / 4; int upperBound = subBandSpectrum.Length * 7 / 8; for (int p = 0; p < 4; p++) { int peakLocation = DataTools.GetMaxIndex(spectrumCopy); if (peakLocation < lowerBound) { continue; // peak location cannot be too low } if (peakLocation > upperBound) { continue; // peak location cannot be too high } double peakHeight = spectrumCopy[peakLocation]; int nh = 3; if (windowWidth == 2048) { nh = 6; } double peakSides = (subBandSpectrum[peakLocation - nh] + subBandSpectrum[peakLocation + nh]) / 2; peaksScore += peakHeight - peakSides; //now zero peak and peak neighbourhood if (windowWidth == 2048) { nh = 9; } for (int n = 0; n < nh; n++) { spectrumCopy[peakLocation + n] = 0; spectrumCopy[peakLocation - n] = 0; } } // for 4 peaks // take average of four peaks peaksScore /= 4; // TEST FOUR: peak position ratios // //int[] peakLocationCentres = { 3, 10, 37, 44, 54, 67 }; int[] peakLocationCentres = { 2, 5, 19, 22, 27, 33 }; int nh2 = 6; if (windowWidth == 1024) { nh2 = 3; } int[] actualPeakLocations = new int[6]; double[] relativePeakHeights = new double[6]; for (int p = 0; p < 6; p++) { double max = -double.MaxValue; int maxId = peakLocationCentres[p]; for (int id = peakLocationCentres[p] - 4; id < peakLocationCentres[p] + 4; id++) { if (id < 0) { id = 0; } if (subBandSpectrum[id] > max) { max = subBandSpectrum[id]; maxId = id; } } actualPeakLocations[p] = maxId; int lowerPosition = maxId - nh2; if (lowerPosition < 0) { lowerPosition = 0; } relativePeakHeights[p] = subBandSpectrum[maxId] - subBandSpectrum[lowerPosition] - subBandSpectrum[maxId + nh2]; } double[] targetHeights = { 0.1, 0.1, 0.5, 0.5, 1.0, 0.6 }; var zscores1 = NormalDist.Convert2ZScores(relativePeakHeights); var zscores2 = NormalDist.Convert2ZScores(targetHeights); double relativePeakScore = AutoAndCrossCorrelation.CorrelationCoefficient(zscores1, zscores2); //########################################################################################### // PROCESS SCORES //if (score1 > scoreThreshold) eventFound = true; //if ((score1 > scoreThreshold) || (score2 > scoreThreshold)) eventFound = true; //double score = (correlationScore * 0.3) + (peaksScore * 0.7); double score = (relativePeakScore * 0.4) + (peaksScore * 0.6); scores[0] = score; scores[1] = relativePeakScore; scores[2] = peaksScore; return(scores); }
/// <summary> /// Apply feature learning process on a set of target (1-minute) recordings (inputPath) /// according to the a set of centroids learned using feature learning process. /// Output feature vectors (outputPath). /// </summary> public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config, List <double[][]> allCentroids, string inputPath, string outputPath) { var simVecDir = Directory.CreateDirectory(Path.Combine(outputPath, "SimilarityVectors")); int frameSize = config.FrameSize; int finalBinCount = config.FinalBinCount; FreqScaleType scaleType = config.FrequencyScaleType; var settings = new SpectrogramSettings() { WindowSize = frameSize, // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second // The "WindowOverlap" is calculated to answer this question // each 24 single-frames duration is equal to 1 second // note that the "WindowOverlap" value should be recalculated if frame size is changed // this has not yet been considered in the Config file! WindowOverlap = 0.10725204, DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; double frameStep = frameSize * (1 - settings.WindowOverlap); int minFreqBin = config.MinFreqBin; int maxFreqBin = config.MaxFreqBin; int numFreqBand = config.NumFreqBand; int patchWidth = (maxFreqBin - minFreqBin + 1) / numFreqBand; int patchHeight = config.PatchHeight; // the number of frames that their feature vectors will be concatenated in order to preserve temporal information. int frameWindowLength = config.FrameWindowLength; // the step size to make a window of frames int stepSize = config.StepSize; // the factor of downsampling int maxPoolingFactor = config.MaxPoolingFactor; // check whether there is any file in the folder/subfolders if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0) { throw new ArgumentException("The folder of recordings is empty..."); } //***** // lists of features for all processing files // the key is the file name, and the value is the features for different bands Dictionary <string, List <double[, ]> > allFilesMinFeatureVectors = new Dictionary <string, List <double[, ]> >(); Dictionary <string, List <double[, ]> > allFilesMeanFeatureVectors = new Dictionary <string, List <double[, ]> >(); Dictionary <string, List <double[, ]> > allFilesMaxFeatureVectors = new Dictionary <string, List <double[, ]> >(); Dictionary <string, List <double[, ]> > allFilesStdFeatureVectors = new Dictionary <string, List <double[, ]> >(); Dictionary <string, List <double[, ]> > allFilesSkewnessFeatureVectors = new Dictionary <string, List <double[, ]> >(); double[,] inputMatrix; List <AudioRecording> recordings = new List <AudioRecording>(); foreach (string filePath in Directory.GetFiles(inputPath, "*.wav")) { FileInfo fileInfo = filePath.ToFileInfo(); // process the wav file if it is not empty if (fileInfo.Length != 0) { var recording = new AudioRecording(filePath); settings.SourceFileName = recording.BaseName; if (config.DoSegmentation) { recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep); } else { recordings.Add(recording); } for (int s = 0; s < recordings.Count; s++) { string pathToSimilarityVectorsFile = Path.Combine(simVecDir.FullName, fileInfo.Name + "-" + s.ToString() + ".csv"); var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recordings[s].WavReader); var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram); // DO RMS NORMALIZATION //sonogram.Data = SNR.RmsNormalization(sonogram.Data); // DO NOISE REDUCTION if (config.DoNoiseReduction) { decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data); } // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins if (minFreqBin != 1 || maxFreqBin != finalBinCount) { inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin); } else { inputMatrix = decibelSpectrogram.Data; } // creating matrices from different freq bands of the source spectrogram List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand); double[][,] matrices2 = allSubmatrices2.ToArray(); List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>(); for (int i = 0; i < matrices2.GetLength(0); i++) { // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling double[,] downsampledMatrix = FeatureLearning.MaxPooling(matrices2[i], config.MaxPoolingFactor); int rows = downsampledMatrix.GetLength(0); int columns = downsampledMatrix.GetLength(1); var sequentialPatches = PatchSampling.GetPatches(downsampledMatrix, patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential); allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix()); } // +++++++++++++++++++++++++++++++++++Feature Transformation // to do the feature transformation, we normalize centroids and // sequential patches from the input spectrogram to unit length // Then, we calculate the dot product of each patch with the centroids' matrix List <double[][]> allNormCentroids = new List <double[][]>(); for (int i = 0; i < allCentroids.Count; i++) { // double check the index of the list double[][] normCentroids = new double[allCentroids.ToArray()[i].GetLength(0)][]; for (int j = 0; j < allCentroids.ToArray()[i].GetLength(0); j++) { normCentroids[j] = ART_2A.NormaliseVector(allCentroids.ToArray()[i][j]); } allNormCentroids.Add(normCentroids); } List <double[][]> allFeatureTransVectors = new List <double[][]>(); // processing the sequential patch matrix for each band for (int i = 0; i < allSequentialPatchMatrix.Count; i++) { List <double[]> featureTransVectors = new List <double[]>(); double[][] similarityVectors = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][]; for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++) { // normalize each patch to unit length var inputVector = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j]; var normVector = inputVector; // to avoid vectors with NaN values, only normalize those that their norm is not equal to zero. if (inputVector.Euclidean() != 0) { normVector = ART_2A.NormaliseVector(inputVector); } similarityVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector); } Csv.WriteMatrixToCsv(pathToSimilarityVectorsFile.ToFileInfo(), similarityVectors.ToMatrix()); // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames // using FrameWindowLength // patchId refers to the patch id that has been processed so far according to the step size. // if we want no overlap between different frame windows, then stepSize = frameWindowLength int patchId = 0; while (patchId + frameWindowLength - 1 < similarityVectors.GetLength(0)) { List <double[]> patchGroup = new List <double[]>(); for (int k = 0; k < frameWindowLength; k++) { patchGroup.Add(similarityVectors[k + patchId]); } featureTransVectors.Add(DataTools.ConcatenateVectors(patchGroup)); patchId = patchId + stepSize; } allFeatureTransVectors.Add(featureTransVectors.ToArray()); } // +++++++++++++++++++++++++++++++++++Feature Transformation // +++++++++++++++++++++++++++++++++++Temporal Summarization // Based on the resolution to generate features, the "numFrames" parameter will be set. // Each 24 single-frame patches form 1 second // for each 24 patch, we generate 5 vectors of min, mean, std, and max (plus skewness from Accord.net) // The pre-assumption is that each input recording is 1 minute long // store features of different bands in lists List <double[, ]> allMinFeatureVectors = new List <double[, ]>(); List <double[, ]> allMeanFeatureVectors = new List <double[, ]>(); List <double[, ]> allMaxFeatureVectors = new List <double[, ]>(); List <double[, ]> allStdFeatureVectors = new List <double[, ]>(); List <double[, ]> allSkewnessFeatureVectors = new List <double[, ]>(); // Each 24 frames form 1 second using WindowOverlap // factors such as stepSize, and maxPoolingFactor should be considered in temporal summarization. int numFrames = 24 / (patchHeight * stepSize * maxPoolingFactor); foreach (var freqBandFeature in allFeatureTransVectors) { List <double[]> minFeatureVectors = new List <double[]>(); List <double[]> meanFeatureVectors = new List <double[]>(); List <double[]> maxFeatureVectors = new List <double[]>(); List <double[]> stdFeatureVectors = new List <double[]>(); List <double[]> skewnessFeatureVectors = new List <double[]>(); int c = 0; while (c + numFrames <= freqBandFeature.GetLength(0)) { // First, make a list of patches that would be equal to the needed resolution (1 second, 60 second, etc.) List <double[]> sequencesOfFramesList = new List <double[]>(); for (int i = c; i < c + numFrames; i++) { sequencesOfFramesList.Add(freqBandFeature[i]); } List <double> min = new List <double>(); List <double> mean = new List <double>(); List <double> std = new List <double>(); List <double> max = new List <double>(); List <double> skewness = new List <double>(); double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix(); // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise for (int j = 0; j < sequencesOfFrames.GetLength(1); j++) { double[] temp = new double[sequencesOfFrames.GetLength(0)]; for (int k = 0; k < sequencesOfFrames.GetLength(0); k++) { temp[k] = sequencesOfFrames[k, j]; } min.Add(temp.GetMinValue()); mean.Add(AutoAndCrossCorrelation.GetAverage(temp)); std.Add(AutoAndCrossCorrelation.GetStdev(temp)); max.Add(temp.GetMaxValue()); skewness.Add(temp.Skewness()); } minFeatureVectors.Add(min.ToArray()); meanFeatureVectors.Add(mean.ToArray()); maxFeatureVectors.Add(max.ToArray()); stdFeatureVectors.Add(std.ToArray()); skewnessFeatureVectors.Add(skewness.ToArray()); c += numFrames; } // when (freqBandFeature.GetLength(0) % numFrames) != 0, it means there are a number of frames (< numFrames) // (or the whole) at the end of the target recording , left unprocessed. // this would be problematic when an the resolution to generate the feature vector is 1 min, // but the the length of the target recording is a bit less than one min. if (freqBandFeature.GetLength(0) % numFrames != 0 && freqBandFeature.GetLength(0) % numFrames > 1) { // First, make a list of patches that would be less than the required resolution List <double[]> sequencesOfFramesList = new List <double[]>(); int unprocessedFrames = freqBandFeature.GetLength(0) % numFrames; for (int i = freqBandFeature.GetLength(0) - unprocessedFrames; i < freqBandFeature.GetLength(0); i++) { sequencesOfFramesList.Add(freqBandFeature[i]); } List <double> min = new List <double>(); List <double> mean = new List <double>(); List <double> std = new List <double>(); List <double> max = new List <double>(); List <double> skewness = new List <double>(); double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix(); // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise for (int j = 0; j < sequencesOfFrames.GetLength(1); j++) { double[] temp = new double[sequencesOfFrames.GetLength(0)]; for (int k = 0; k < sequencesOfFrames.GetLength(0); k++) { temp[k] = sequencesOfFrames[k, j]; } min.Add(temp.GetMinValue()); mean.Add(AutoAndCrossCorrelation.GetAverage(temp)); std.Add(AutoAndCrossCorrelation.GetStdev(temp)); max.Add(temp.GetMaxValue()); skewness.Add(temp.Skewness()); } minFeatureVectors.Add(min.ToArray()); meanFeatureVectors.Add(mean.ToArray()); maxFeatureVectors.Add(max.ToArray()); stdFeatureVectors.Add(std.ToArray()); skewnessFeatureVectors.Add(skewness.ToArray()); } allMinFeatureVectors.Add(minFeatureVectors.ToArray().ToMatrix()); allMeanFeatureVectors.Add(meanFeatureVectors.ToArray().ToMatrix()); allMaxFeatureVectors.Add(maxFeatureVectors.ToArray().ToMatrix()); allStdFeatureVectors.Add(stdFeatureVectors.ToArray().ToMatrix()); allSkewnessFeatureVectors.Add(skewnessFeatureVectors.ToArray().ToMatrix()); } //***** // the keys of the following dictionaries contain file name // and their values are a list<double[,]> which the list.count is // the number of all subsegments for which features are extracted // the number of freq bands defined as an user-defined parameter. // the 2D-array is the feature vectors. allFilesMinFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMinFeatureVectors); allFilesMeanFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMeanFeatureVectors); allFilesMaxFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMaxFeatureVectors); allFilesStdFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allStdFeatureVectors); allFilesSkewnessFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allSkewnessFeatureVectors); // +++++++++++++++++++++++++++++++++++Temporal Summarization } } } // ++++++++++++++++++++++++++++++++++Writing features to one file // First, concatenate mean, max, std for each second. // Then, write the features of each pre-defined frequency band into a separate CSV file. var filesName = allFilesMeanFeatureVectors.Keys.ToArray(); var minFeatures = allFilesMinFeatureVectors.Values.ToArray(); var meanFeatures = allFilesMeanFeatureVectors.Values.ToArray(); var maxFeatures = allFilesMaxFeatureVectors.Values.ToArray(); var stdFeatures = allFilesStdFeatureVectors.Values.ToArray(); var skewnessFeatures = allFilesSkewnessFeatureVectors.Values.ToArray(); // The number of elements in the list shows the number of freq bands // the size of each element in the list shows the number of files processed to generate feature for. // the dimensions of the matrix shows the number of feature vectors generated for each file and the length of feature vector var allMins = new List <double[][, ]>(); var allMeans = new List <double[][, ]>(); var allMaxs = new List <double[][, ]>(); var allStds = new List <double[][, ]>(); var allSkewness = new List <double[][, ]>(); // looping over freq bands for (int i = 0; i < meanFeatures[0].Count; i++) { var mins = new List <double[, ]>(); var means = new List <double[, ]>(); var maxs = new List <double[, ]>(); var stds = new List <double[, ]>(); var skewnesses = new List <double[, ]>(); // looping over all files for (int k = 0; k < meanFeatures.Length; k++) { mins.Add(minFeatures[k].ToArray()[i]); means.Add(meanFeatures[k].ToArray()[i]); maxs.Add(maxFeatures[k].ToArray()[i]); stds.Add(stdFeatures[k].ToArray()[i]); skewnesses.Add(skewnessFeatures[k].ToArray()[i]); } allMins.Add(mins.ToArray()); allMeans.Add(means.ToArray()); allMaxs.Add(maxs.ToArray()); allStds.Add(stds.ToArray()); allSkewness.Add(skewnesses.ToArray()); } // each element of meanFeatures array is a list of features for different frequency bands. // looping over the number of freq bands for (int i = 0; i < allMeans.ToArray().GetLength(0); i++) { // creating output feature file based on the number of freq bands var outputFeatureFile = Path.Combine(outputPath, "FeatureVectors-" + i.ToString() + ".csv"); // creating the header for CSV file List <string> header = new List <string>(); header.Add("file name"); for (int j = 0; j < allMins.ToArray()[i][0].GetLength(1); j++) { header.Add("min" + j.ToString()); } for (int j = 0; j < allMeans.ToArray()[i][0].GetLength(1); j++) { header.Add("mean" + j.ToString()); } for (int j = 0; j < allMaxs.ToArray()[i][0].GetLength(1); j++) { header.Add("max" + j.ToString()); } for (int j = 0; j < allStds.ToArray()[i][0].GetLength(1); j++) { header.Add("std" + j.ToString()); } for (int j = 0; j < allSkewness.ToArray()[i][0].GetLength(1); j++) { header.Add("skewness" + j.ToString()); } var csv = new StringBuilder(); string content = string.Empty; foreach (var entry in header.ToArray()) { content += entry.ToString() + ","; } csv.AppendLine(content); var allFilesFeatureVectors = new Dictionary <string, double[, ]>(); // looping over files for (int j = 0; j < allMeans.ToArray()[i].GetLength(0); j++) { // concatenating mean, std, and max vector together for the pre-defined resolution List <double[]> featureVectors = new List <double[]>(); for (int k = 0; k < allMeans.ToArray()[i][j].ToJagged().GetLength(0); k++) { List <double[]> featureList = new List <double[]> { allMins.ToArray()[i][j].ToJagged()[k], allMeans.ToArray()[i][j].ToJagged()[k], allMaxs.ToArray()[i][j].ToJagged()[k], allStds.ToArray()[i][j].ToJagged()[k], allSkewness.ToArray()[i][j].ToJagged()[k], }; double[] featureVector = DataTools.ConcatenateVectors(featureList); featureVectors.Add(featureVector); } allFilesFeatureVectors.Add(filesName[j], featureVectors.ToArray().ToMatrix()); } // writing feature vectors to CSV file foreach (var entry in allFilesFeatureVectors) { content = string.Empty; content += entry.Key.ToString() + ","; foreach (var cent in entry.Value) { content += cent.ToString() + ","; } csv.AppendLine(content); } File.WriteAllText(outputFeatureFile, csv.ToString()); } }
//public const string key_COUNT = "count"; public static Tuple <double[, ], double[, ], double[, ], double[]> DetectBarsUsingXcorrelation(double[,] m, int rowStep, int rowWidth, int colStep, int colWidth, double intensityThreshold, int zeroBinCount) { bool doNoiseremoval = true; //intensityThreshold = 0.3; int rowCount = m.GetLength(0); int colCount = m.GetLength(1); int numberOfColSteps = colCount / colStep; int numberOfRowSteps = rowCount / rowStep; var intensityMatrix = new double[numberOfRowSteps, numberOfColSteps]; var periodicityMatrix = new double[numberOfRowSteps, numberOfColSteps]; var hitsMatrix = new double[rowCount, colCount]; double[] array2return = null; for (int b = 0; b < numberOfColSteps; b++) { int minCol = b * colStep; int maxCol = minCol + colWidth - 1; double[,] subMatrix = MatrixTools.Submatrix(m, 0, minCol, rowCount - 1, maxCol); double[] amplitudeArray = MatrixTools.GetRowAverages(subMatrix); if (doNoiseremoval) { double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction SNR.BackgroundNoise bgn = SNR.SubtractBackgroundNoiseFromSignal(amplitudeArray, StandardDeviationCount); amplitudeArray = bgn.NoiseReducedSignal; } //double noiseThreshold = 0.005; //for (int i = 1; i < amplitudeArray.Length - 1; i++) //{ // if ((amplitudeArray[i - 1] < noiseThreshold) && (amplitudeArray[i + 1] < noiseThreshold)) amplitudeArray[i] = 0.0; //} //DataTools.writeBarGraph(amplitudeArray); if (b == 2) { array2return = amplitudeArray; //returned for debugging purposes only } //ii: DETECT HARMONICS var results = AutoAndCrossCorrelation.DetectPeriodicityInLongArray(amplitudeArray, rowStep, rowWidth, zeroBinCount); double[] intensity = results.Item1; //an array of periodicity scores double[] periodicity = results.Item2; //transfer periodicity info to a matrices. for (int rs = 0; rs < numberOfRowSteps; rs++) { intensityMatrix[rs, b] = intensity[rs]; periodicityMatrix[rs, b] = periodicity[rs]; //mark up the hits matrix //double relativePeriod = periodicity[rs] / rowWidth / 2; if (intensity[rs] > intensityThreshold) { int minRow = rs * rowStep; int maxRow = minRow + rowStep - 1; for (int r = minRow; r < maxRow; r++) { for (int c = minCol; c < maxCol; c++) { //hitsMatrix[r, c] = relativePeriod; hitsMatrix[r, c] = periodicity[rs]; } } } // if() } // for loop over numberOfRowSteps } // for loop over numberOfColSteps return(Tuple.Create(intensityMatrix, periodicityMatrix, hitsMatrix, array2return)); }
} //DetectBarsInTheRowsOfaMatrix() /// A METHOD TO DETECT HARMONICS IN THE ROWS of the passed portion of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Was first developed for crow calls. /// First looks for a decibel profile that matches the passed call duration and decibel loudness /// Then samples the centre portion for the correct harmonic period. /// </summary> /// <param name="m"></param> /// <param name="amplitudeThreshold"></param> /// <returns></returns> public static Tuple <double[], double[], double[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold, int callSpan) { int zeroBinCount = 3; //to remove low freq content which dominates the spectrum int halfspan = callSpan / 2; double[] dBArray = MatrixTools.GetRowAverages(m); dBArray = DataTools.filterMovingAverage(dBArray, 3); bool doNoiseRemoval = true; if (doNoiseRemoval) { double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction SNR.BackgroundNoise bgn = SNR.SubtractBackgroundNoiseFromSignal(dBArray, StandardDeviationCount); dBArray = bgn.NoiseReducedSignal; } bool[] peaks = DataTools.GetPeaks(dBArray); int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var intensity = new double[rowCount]; //an array of period intensity var periodicity = new double[rowCount]; //an array of the periodicity values for (int r = halfspan; r < rowCount - halfspan; r++) { //APPLY A FILTER: must satisfy the following conditions for a call. if (!peaks[r]) { continue; } if (dBArray[r] < dBThreshold) { continue; } double lowerDiff = dBArray[r] - dBArray[r - halfspan]; double upperDiff = dBArray[r] - dBArray[r + halfspan]; if (lowerDiff < dBThreshold || upperDiff < dBThreshold) { continue; } double[] prevRow = DataTools.DiffFromMean(MatrixTools.GetRow(m, r - 1)); double[] thisRow = DataTools.DiffFromMean(MatrixTools.GetRow(m, r)); var spectrum = AutoAndCrossCorrelation.CrossCorr(prevRow, thisRow); for (int s = 0; s < zeroBinCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; intensity[r] = intensityValue; double period = 0.0; if (maxId != 0) { period = 2 * colCount / (double)maxId; } periodicity[r] = period; prevRow = thisRow; } // rows return(Tuple.Create(dBArray, intensity, periodicity)); } //DetectHarmonicsInSonogramMatrix()
/// <summary> /// THE KEY ANALYSIS METHOD. /// </summary> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LewinsRailConfig lrConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } int sr = recording.SampleRate; int upperBandMinHz = lrConfig.UpperBandMinHz; int upperBandMaxHz = lrConfig.UpperBandMaxHz; int lowerBandMinHz = lrConfig.LowerBandMinHz; int lowerBandMaxHz = lrConfig.LowerBandMaxHz; //double decibelThreshold = lrConfig.DecibelThreshold; //dB //int windowSize = lrConfig.WindowSize; double eventThreshold = lrConfig.EventThreshold; //in 0-1 double minDuration = lrConfig.MinDuration; // seconds double maxDuration = lrConfig.MaxDuration; // seconds double minPeriod = lrConfig.MinPeriod; // seconds double maxPeriod = lrConfig.MaxPeriod; // seconds //double freqBinWidth = sr / (double)windowSize; double freqBinWidth = sr / (double)sonoConfig.WindowSize; //i: MAKE SONOGRAM double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); int step = (int)Math.Round(framesPerSecond); //take one second steps int stepCount = rowCount / step; int sampleLength = 64; //64 frames = 3.7 seconds. Suitable for Lewins Rail. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 3; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if (period < minPeriod || period > maxPeriod) { continue; } // lay down score for sample length for (int j = 0; j < sampleLength; j++) { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //###################################################################### //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 5); var predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerBandMinHz, upperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, eventThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray, segmentStartOffset); var hits = new double[rowCount, colCount]; //###################################################################### var scorePlot = new Plot("L.pect", intensity, lrConfig.IntensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays DataTools.Normalise(intensity, lrConfig.DecibelThreshold, out var normalisedScores, out var normalisedThreshold); var intensityPlot = new Plot("Intensity", normalisedScores, normalisedThreshold); DataTools.Normalise(periodicity, 10, out normalisedScores, out normalisedThreshold); var periodicityPlot = new Plot("Periodicity", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, intensityPlot, periodicityPlot }; debugImage = DrawDebugImage(sonogram, predictedEvents, debugPlots, hits); } return(Tuple.Create(sonogram, hits, intensity, predictedEvents, debugImage)); } //Analysis()
public static void DetectTrackPeriodicity(SpectralTrack track, int xCorrelationLength, List <double[]> listOfSpectralBins, double framesPerSecond) { int halfSample = xCorrelationLength / 2; int lowerBin = (int)Math.Round(track.AverageBin); int upperBin = lowerBin + 1; upperBin = upperBin >= listOfSpectralBins.Count ? listOfSpectralBins.Count - 1 : upperBin; int length = track.Length; //only sample the middle third of track int start = length / 3; int end = start + start - 1; //init score track and periodicity track double[] score = new double[start]; double[] period = new double[start]; for (int r = start; r < end; r++) // for each position in centre third of track { int sampleStart = track.StartFrame - halfSample + r; if (sampleStart < 0) { sampleStart = 0; } double[] lowerSubarray = DataTools.Subarray(listOfSpectralBins[lowerBin], sampleStart, xCorrelationLength); double[] upperSubarray = DataTools.Subarray(listOfSpectralBins[upperBin], sampleStart, xCorrelationLength); if (lowerSubarray == null || upperSubarray == null) { break; //reached end of array } if (lowerSubarray.Length != xCorrelationLength || upperSubarray.Length != xCorrelationLength) { break; //reached end of array } lowerSubarray = DataTools.SubtractMean(lowerSubarray); // zero mean the arrays upperSubarray = DataTools.SubtractMean(upperSubarray); //upperSubarray = lowerSubarray; var xCorSpectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); //sub-arrays already normalised //DataTools.writeBarGraph(xCorSpectrum); //Set the minimum OscilFreq of interest = 8 per second. Therefore max period ~ 125ms; //int 0.125sec = 2 * xCorrelationLength / minInterestingID / framesPerSecond; // double maxPeriod = 0.05; //maximum period of interest int minInterestingID = (int)Math.Round(2 * xCorrelationLength / maxPeriod / framesPerSecond); for (int s = 0; s <= minInterestingID; s++) { xCorSpectrum[s] = 0.0; //in real data these low freq/long period bins are dominant and hide other frequency content } int maxIdXcor = DataTools.GetMaxIndex(xCorSpectrum); period[r - start] = 2 * xCorrelationLength / (double)maxIdXcor / framesPerSecond; //convert maxID to period in seconds score[r - start] = xCorSpectrum[maxIdXcor]; } // for loop track.periodicityScore = score; track.periodicity = period; //if (track.score.Average() < 0.3) track = null; }
public void TestFeatureLearning() { // var outputDir = this.outputDirectory; var resultDir = PathHelper.ResolveAssetPath("FeatureLearning"); var folderPath = Path.Combine(resultDir, "random_audio_segments"); // Liz // PathHelper.ResolveAssetPath(@"C:\Users\kholghim\Mahnoosh\PcaWhitening\random_audio_segments\1192_1000"); // var resultDir = PathHelper.ResolveAssetPath(@"C:\Users\kholghim\Mahnoosh\PcaWhitening"); var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png"); var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png"); var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png"); var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png"); // var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp"); // +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1000 random 1-min recordings from Gympie // check whether there is any file in the folder/subfolders if (Directory.GetFiles(folderPath, "*", SearchOption.AllDirectories).Length == 0) { throw new ArgumentException("The folder of recordings is empty..."); } // get the nyquist value from the first wav file in the folder of recordings int nq = new AudioRecording(Directory.GetFiles(folderPath, "*.wav")[0]).Nyquist; int nyquist = nq; // 11025; int frameSize = 1024; int finalBinCount = 128; // 256; // 100; // 40; // 200; // int hertzInterval = 1000; FreqScaleType scaleType = FreqScaleType.Mel; var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval); var fst = freqScale.ScaleType; var sonoConfig = new SonogramConfig { WindowSize = frameSize, // since each 24 frames duration is equal to 1 second WindowOverlap = 0.1028, DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, NoiseReductionType = NoiseReductionType.None, }; /* * // testing * var recordingPath3 = PathHelper.ResolveAsset(folderPath, "SM304264_0+1_20160421_024539_46-47min.wav"); * var recording3 = new AudioRecording(recordingPath3); * var sonogram3 = new SpectrogramStandard(sonoConfig, recording3.WavReader); * * // DO DRAW SPECTROGRAM * var image4 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); * image4.Save(outputMelImagePath); * * // Do RMS normalization * sonogram3.Data = SNR.RmsNormalization(sonogram3.Data); * var image5 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); * image5.Save(outputNormMelImagePath); * * // NOISE REDUCTION * sonogram3.Data = PcaWhitening.NoiseReduction(sonogram3.Data); * var image6 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); * image6.Save(outputNoiseReducedMelImagePath); * * //testing */ // Define the minFreBin and MaxFreqBin to be able to work at arbitrary frequency bin bounds. // The default value is minFreqBin = 1 and maxFreqBin = finalBinCount. // To work with arbitrary frequency bin bounds we need to manually set these two parameters. int minFreqBin = 40; //1 int maxFreqBin = 80; //finalBinCount; int numFreqBand = 1; //4; int patchWidth = (maxFreqBin - minFreqBin + 1) / numFreqBand; // finalBinCount / numFreqBand; int patchHeight = 1; // 2; // 4; // 16; // 6; // Frame size int numRandomPatches = 20; // 40; // 80; // 30; // 100; // 500; // // int fileCount = Directory.GetFiles(folderPath, "*.wav").Length; // Define variable number of "randomPatch" lists based on "numFreqBand" Dictionary <string, List <double[, ]> > randomPatchLists = new Dictionary <string, List <double[, ]> >(); for (int i = 0; i < numFreqBand; i++) { randomPatchLists.Add(string.Format("randomPatch{0}", i.ToString()), new List <double[, ]>()); } List <double[, ]> randomPatches = new List <double[, ]>(); /* * foreach (string filePath in Directory.GetFiles(folderPath, "*.wav")) * { * FileInfo f = filePath.ToFileInfo(); * if (f.Length == 0) * { * Debug.WriteLine(f.Name); * } * } */ double[,] inputMatrix; foreach (string filePath in Directory.GetFiles(folderPath, "*.wav")) { FileInfo fileInfo = filePath.ToFileInfo(); // process the wav file if it is not empty if (fileInfo.Length != 0) { var recording = new AudioRecording(filePath); sonoConfig.SourceFName = recording.BaseName; var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // DO RMS NORMALIZATION sonogram.Data = SNR.RmsNormalization(sonogram.Data); // DO NOISE REDUCTION // sonogram.Data = SNR.NoiseReduce_Median(sonogram.Data, nhBackgroundThreshold: 2.0); sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data); // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins if (minFreqBin != 1 || maxFreqBin != finalBinCount) { inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(sonogram.Data, minFreqBin, maxFreqBin); } else { inputMatrix = sonogram.Data; } // creating matrices from different freq bands of the source spectrogram List <double[, ]> allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand); // Second: selecting random patches from each freq band matrix and add them to the corresponding patch list int count = 0; while (count < allSubmatrices.Count) { randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling .GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numRandomPatches, PatchSampling.SamplingMethod.Random).ToMatrix()); count++; } } } foreach (string key in randomPatchLists.Keys) { randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key])); } // convert list of random patches matrices to one matrix int numberOfClusters = 50; //256; // 128; // 64; // 32; // 10; // List <double[][]> allBandsCentroids = new List <double[][]>(); List <KMeansClusterCollection> allClusteringOutput = new List <KMeansClusterCollection>(); for (int i = 0; i < randomPatches.Count; i++) { double[,] patchMatrix = randomPatches[i]; // Apply PCA Whitening var whitenedSpectrogram = PcaWhitening.Whitening(true, patchMatrix); // Do k-means clustering var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numberOfClusters); // var clusteringOutput = KmeansClustering.Clustering(patchMatrix, noOfClusters, pathToClusterCsvFile); // writing centroids to a csv file // note that Csv.WriteToCsv can't write data types like dictionary<int, double[]> (problems with arrays) // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv // it might be a better way to do this string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv"); var clusterCentroids = clusteringOutput.ClusterIdCentroid.Values.ToArray(); Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix()); //Csv.WriteToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids); // sorting clusters based on size and output it to a csv file Dictionary <int, double> clusterIdSize = clusteringOutput.ClusterIdSize; int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize); // Write cluster ID and size to a CSV file string pathToClusterSizeCsvFile = Path.Combine(resultDir, "ClusterSize" + i.ToString() + ".csv"); Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize); // Draw cluster image directly from clustering output List <KeyValuePair <int, double[]> > list = clusteringOutput.ClusterIdCentroid.ToList(); double[][] centroids = new double[list.Count][]; for (int j = 0; j < list.Count; j++) { centroids[j] = list[j].Value; } allBandsCentroids.Add(centroids); allClusteringOutput.Add(clusteringOutput.Clusters); List <double[, ]> allCentroids = new List <double[, ]>(); for (int k = 0; k < centroids.Length; k++) { // convert each centroid to a matrix in order of cluster ID // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight); // OR: in order of cluster size double[,] cent = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, patchHeight); // normalize each centroid double[,] normCent = DataTools.normalise(cent); // add a row of zero to each centroid double[,] cent2 = PatchSampling.AddRow(normCent); allCentroids.Add(cent2); } // concatenate all centroids double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids); // Draw clusters // int gridInterval = 1000; // var freqScale = new FrequencyScale(FreqScaleType.Mel, nyquist, frameSize, finalBinCount, gridInterval); var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix); clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone); // clusterImage.Save(outputClusterImagePath, ImageFormat.Bmp); var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp"); // Image bmp = Image.Load<Rgb24>(filename); FrequencyScale.DrawFrequencyLinesOnImage((Image <Rgb24>)clusterImage, freqScale, includeLabels: false); clusterImage.Save(outputClusteringImage); } //+++++++++++++++++++++++++++++++++++++++++++++++++++++Processing and generating features for the target recordings var recording2Path = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_353972_20160303_055854_60_0.wav"); // folder with 1000 files // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_353887_20151230_042625_60_0.wav"); // folder with 1000 files // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_354744_20151018_053923_60_0.wav"); // folder with 100 files var recording2 = new AudioRecording(recording2Path); var sonogram2 = new SpectrogramStandard(sonoConfig, recording2.WavReader); // DO DRAW SPECTROGRAM var image = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image.Save(outputMelImagePath); // Do RMS normalization sonogram2.Data = SNR.RmsNormalization(sonogram2.Data); var image2 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image2.Save(outputNormMelImagePath); // NOISE REDUCTION sonogram2.Data = PcaWhitening.NoiseReduction(sonogram2.Data); var image3 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image3.Save(outputNoiseReducedMelImagePath); // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins if (minFreqBin != 1 || maxFreqBin != finalBinCount) { inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(sonogram2.Data, minFreqBin, maxFreqBin); } else { inputMatrix = sonogram2.Data; } // extracting sequential patches from the target spectrogram List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand); double[][,] matrices2 = allSubmatrices2.ToArray(); List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>(); for (int i = 0; i < matrices2.GetLength(0); i++) { int rows = matrices2[i].GetLength(0); int columns = matrices2[i].GetLength(1); var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential); allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix()); } // +++++++++++++++++++++++++++++++++++Feature Transformation // to do the feature transformation, we normalize centroids and // sequential patches from the input spectrogram to unit length // Then, we calculate the dot product of each patch with the centroids' matrix List <double[][]> allNormCentroids = new List <double[][]>(); for (int i = 0; i < allBandsCentroids.Count; i++) { // double check the index of the list double[][] normCentroids = new double[allBandsCentroids.ToArray()[i].GetLength(0)][]; for (int j = 0; j < allBandsCentroids.ToArray()[i].GetLength(0); j++) { normCentroids[j] = ART_2A.NormaliseVector(allBandsCentroids.ToArray()[i][j]); } allNormCentroids.Add(normCentroids); } List <double[][]> allFeatureTransVectors = new List <double[][]>(); for (int i = 0; i < allSequentialPatchMatrix.Count; i++) { double[][] featureTransVectors = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][]; for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++) { var normVector = ART_2A.NormaliseVector(allSequentialPatchMatrix.ToArray()[i] .ToJagged()[j]); // normalize each patch to unit length featureTransVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector); } allFeatureTransVectors.Add(featureTransVectors); } // +++++++++++++++++++++++++++++++++++Feature Transformation // +++++++++++++++++++++++++++++++++++Temporal Summarization // The resolution to generate features is 1 second // Each 24 single-frame patches form 1 second // for each 24 patch, we generate 3 vectors of mean, std, and max // The pre-assumption is that each input spectrogram is 1 minute List <double[, ]> allMeanFeatureVectors = new List <double[, ]>(); List <double[, ]> allMaxFeatureVectors = new List <double[, ]>(); List <double[, ]> allStdFeatureVectors = new List <double[, ]>(); // number of frames needs to be concatenated to form 1 second. Each 24 frames make 1 second. int numFrames = (24 / patchHeight) * 60; foreach (var freqBandFeature in allFeatureTransVectors) { // store features of different bands in lists List <double[]> meanFeatureVectors = new List <double[]>(); List <double[]> maxFeatureVectors = new List <double[]>(); List <double[]> stdFeatureVectors = new List <double[]>(); int c = 0; while (c + numFrames < freqBandFeature.GetLength(0)) { // First, make a list of patches that would be equal to 1 second List <double[]> sequencesOfFramesList = new List <double[]>(); for (int i = c; i < c + numFrames; i++) { sequencesOfFramesList.Add(freqBandFeature[i]); } List <double> mean = new List <double>(); List <double> std = new List <double>(); List <double> max = new List <double>(); double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix(); // int len = sequencesOfFrames.GetLength(1); // Second, calculate mean, max, and standard deviation of six vectors element-wise for (int j = 0; j < sequencesOfFrames.GetLength(1); j++) { double[] temp = new double[sequencesOfFrames.GetLength(0)]; for (int k = 0; k < sequencesOfFrames.GetLength(0); k++) { temp[k] = sequencesOfFrames[k, j]; } mean.Add(AutoAndCrossCorrelation.GetAverage(temp)); std.Add(AutoAndCrossCorrelation.GetStdev(temp)); max.Add(temp.GetMaxValue()); } meanFeatureVectors.Add(mean.ToArray()); maxFeatureVectors.Add(max.ToArray()); stdFeatureVectors.Add(std.ToArray()); c += numFrames; } allMeanFeatureVectors.Add(meanFeatureVectors.ToArray().ToMatrix()); allMaxFeatureVectors.Add(maxFeatureVectors.ToArray().ToMatrix()); allStdFeatureVectors.Add(stdFeatureVectors.ToArray().ToMatrix()); } // +++++++++++++++++++++++++++++++++++Temporal Summarization // ++++++++++++++++++++++++++++++++++Writing features to file // First, concatenate mean, max, std for each second. // Then write to CSV file. for (int j = 0; j < allMeanFeatureVectors.Count; j++) { // write the features of each pre-defined frequency band into a separate CSV file var outputFeatureFile = Path.Combine(resultDir, "FeatureVectors" + j.ToString() + ".csv"); // creating the header for CSV file List <string> header = new List <string>(); for (int i = 0; i < allMeanFeatureVectors.ToArray()[j].GetLength(1); i++) { header.Add("mean" + i.ToString()); } for (int i = 0; i < allMaxFeatureVectors.ToArray()[j].GetLength(1); i++) { header.Add("max" + i.ToString()); } for (int i = 0; i < allStdFeatureVectors.ToArray()[j].GetLength(1); i++) { header.Add("std" + i.ToString()); } // concatenating mean, std, and max vector together for each 1 second List <double[]> featureVectors = new List <double[]>(); for (int i = 0; i < allMeanFeatureVectors.ToArray()[j].ToJagged().GetLength(0); i++) { List <double[]> featureList = new List <double[]> { allMeanFeatureVectors.ToArray()[j].ToJagged()[i], allMaxFeatureVectors.ToArray()[j].ToJagged()[i], allStdFeatureVectors.ToArray()[j].ToJagged()[i], }; double[] featureVector = DataTools.ConcatenateVectors(featureList); featureVectors.Add(featureVector); } // writing feature vectors to CSV file using (StreamWriter file = new StreamWriter(outputFeatureFile)) { // writing the header to CSV file foreach (var entry in header.ToArray()) { file.Write(entry + ","); } file.Write(Environment.NewLine); foreach (var entry in featureVectors.ToArray()) { foreach (var value in entry) { file.Write(value + ","); } file.Write(Environment.NewLine); } } } /* * // Reconstructing the target spectrogram based on clusters' centroids * List<double[,]> convertedSpec = new List<double[,]>(); * int columnPerFreqBand = sonogram2.Data.GetLength(1) / numFreqBand; * for (int i = 0; i < allSequentialPatchMatrix.Count; i++) * { * double[,] reconstructedSpec2 = KmeansClustering.ReconstructSpectrogram(allSequentialPatchMatrix.ToArray()[i], allClusteringOutput.ToArray()[i]); * convertedSpec.Add(PatchSampling.ConvertPatches(reconstructedSpec2, patchWidth, patchHeight, columnPerFreqBand)); * } * * sonogram2.Data = PatchSampling.ConcatFreqBandMatrices(convertedSpec); * * // DO DRAW SPECTROGRAM * var reconstructedSpecImage = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + freqScale.ScaleType.ToString(), freqScale.GridLineLocations); * reconstructedSpecImage.Save(outputReSpecImagePath); */ }
} // end method ConvertODScores2Events() /* * public static double PeakEntropy(double[] array) * { * bool[] peaks = DataTools.GetPeaks(array); * int peakCount = DataTools.CountTrues(peaks); * //set up histogram of peak energies * double[] histogram = new double[peakCount]; * int count = 0; * for (int k = 0; k < array.Length; k++) * { * if (peaks[k]) * { * histogram[count] = array[k]; * count++; * } * } * histogram = DataTools.NormaliseMatrixValues(histogram); * histogram = DataTools.Normalise2Probabilites(histogram); * double normFactor = Math.Log(histogram.Length) / DataTools.ln2; //normalize for length of the array * double entropy = DataTools.Entropy(histogram) / normFactor; * return entropy; * } * */ /// <summary> /// returns the periodicity in an array of values. /// </summary> public static double[] PeriodicityAnalysis(double[] array) { //DataTools.writeBarGraph(array); var A = AutoAndCrossCorrelation.MyCrossCorrelation(array, array); // do 2/3rds of maximum possible lag int dctLength = A.Length; A = DataTools.SubtractMean(A); //DataTools.writeBarGraph(A); double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients double[] dct = MFCCStuff.DCT(A, cosines); for (int i = 0; i < dctLength; i++) { dct[i] = Math.Abs(dct[i]); //convert to absolute values } //DataTools.writeBarGraph(dct); for (int i = 0; i < 3; i++) { dct[i] = 0.0; //remove low freq oscillations from consideration } dct = DataTools.normalise2UnitLength(dct); var peaks = DataTools.GetPeaks(dct); // remove non-peak values and low values for (int i = 0; i < dctLength; i++) { if (!peaks[i] || dct[i] < 0.2) { dct[i] = 0.0; } } DataTools.writeBarGraph(dct); //get periodicity of highest three values int peakCount = 3; var period = new double[peakCount]; var maxIndex = new double[peakCount]; for (int i = 0; i < peakCount; i++) { int indexOfMaxValue = DataTools.GetMaxIndex(dct); maxIndex[i] = indexOfMaxValue; //double oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi if ((double)indexOfMaxValue == 0) { period[i] = 0.0; } else { period[i] = dctLength / (double)indexOfMaxValue * 2; } dct[indexOfMaxValue] = 0.0; // remove value for next iteration } LoggedConsole.WriteLine("Max indices = {0:f0}, {1:f0}, {2:f0}.", maxIndex[0], maxIndex[1], maxIndex[2]); return(period); }
} //Analyze() /// <summary> /// ################ THE KEY ANALYSIS METHOD /// Returns a DataTable /// </summary> /// <param name="fiSegmentOfSourceFile"></param> /// <param name="configDict"></param> /// <param name="segmentStartOffset"></param> /// <param name="diOutputDir"></param> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - ignor those set by user int frameSize = 1024; double windowOverlap = 0.0; int upperBandMinHz = int.Parse(configDict[KeyUpperfreqbandBtm]); int upperBandMaxHz = int.Parse(configDict[KeyUpperfreqbandTop]); int lowerBandMinHz = int.Parse(configDict[KeyLowerfreqbandBtm]); int lowerBandMaxHz = int.Parse(configDict[KeyLowerfreqbandTop]); double decibelThreshold = double.Parse(configDict[KeyDecibelThreshold]);; //dB double intensityThreshold = double.Parse(configDict[KeyIntensityThreshold]); //in 0-1 double minDuration = double.Parse(configDict[KeyMinDuration]); // seconds double maxDuration = double.Parse(configDict[KeyMaxDuration]); // seconds double minPeriod = double.Parse(configDict[KeyMinPeriod]); // seconds double maxPeriod = double.Parse(configDict[KeyMaxPeriod]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = recording.BaseName; sonoConfig.WindowSize = frameSize; sonoConfig.WindowOverlap = windowOverlap; //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"); TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, (rowCount - 1), lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, (rowCount - 1), upperBandMaxBin); int step = (int)Math.Round(framesPerSecond); //take one second steps int stepCount = rowCount / step; int sampleLength = 64; //64 frames = 3.7 seconds. Suitable for Lewins Rail. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if ((lowerSubarray.Length != sampleLength) || (upperSubarray.Length != sampleLength)) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 3; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if ((period < minPeriod) || (period > maxPeriod)) { continue; } for (int j = 0; j < sampleLength; j++) //lay down score for sample length { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //###################################################################### //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 5); List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerBandMinHz, upperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray); var hits = new double[rowCount, colCount]; return(Tuple.Create(sonogram, hits, intensity, predictedEvents, tsRecordingtDuration)); } //Analysis()