/// <summary> /// returns oscillations using the DCT. /// </summary> public static void GetOscillationUsingDct(double[] array, double framesPerSecond, double[,] cosines, out double oscilFreq, out double period, out double intenisty) { var modifiedArray = DataTools.SubtractMean(array); var dctCoeff = MFCCStuff.DCT(modifiedArray, cosines); // convert to absolute values because not interested in negative values due to phase. for (int i = 0; i < dctCoeff.Length; i++) { dctCoeff[i] = Math.Abs(dctCoeff[i]); } // remove low freq oscillations from consideration int thresholdIndex = dctCoeff.Length / 5; for (int i = 0; i < thresholdIndex; i++) { dctCoeff[i] = 0.0; } dctCoeff = DataTools.normalise2UnitLength(dctCoeff); //dct = DataTools.NormaliseMatrixValues(dctCoeff); //another option to NormaliseMatrixValues int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff); //recalculate DCT duration in seconds double dctDuration = dctCoeff.Length / framesPerSecond; oscilFreq = indexOfMaxValue / dctDuration * 0.5; //Times 0.5 because index = Pi and not 2Pi period = 2 * dctCoeff.Length / (double)indexOfMaxValue / framesPerSecond; //convert maxID to period in seconds intenisty = dctCoeff[indexOfMaxValue]; }
/// <summary> /// Returns a HISTORGRAM OF THE DISTRIBUTION of SPECTRAL maxima. /// </summary> public static Tuple <int[], int[]> HistogramOfSpectralPeaks(double[,] spectrogram) { if (spectrogram == null) { throw new ArgumentNullException(nameof(spectrogram)); } int frameCount = spectrogram.GetLength(0); int freqBinCount = spectrogram.GetLength(1); int[] peakBins = new int[frameCount]; // store bin id of peaks int[] histogram = new int[freqBinCount]; // histogram of peak locations // for all frames in dB array for (int r = 0; r < frameCount; r++) { double[] spectrum = DataTools.GetRow(spectrogram, r); //locate maximum peak int j = DataTools.GetMaxIndex(spectrum); //if (spectrogram[r, j] > peakThreshold) //{ histogram[j]++; //store bin of peak peakBins[r] = j; //} } return(Tuple.Create(histogram, peakBins)); }
/// <summary> /// /// </summary> /// <param name="ae">an acoustic event</param> /// <param name="dbArray">The sequence of frame dB over the event</param> /// <returns></returns> public static System.Tuple <double, double> KiwiPeakAnalysis(AcousticEvent ae, double[] dbArray) { //dbArray = DataTools.filterMovingAverage(dbArray, 3); bool[] peaks = DataTools.GetPeaks(dbArray); //locate the peaks double[] peakValues = new double[dbArray.Length]; for (int i = 0; i < dbArray.Length; i++) { if (peaks[i]) { peakValues[i] = dbArray[i]; } } //take the top N peaks int N = 5; double[] topNValues = new double[N]; for (int p = 0; p < N; p++) { int maxID = DataTools.GetMaxIndex(peakValues); topNValues[p] = peakValues[maxID]; peakValues[maxID] = 0.0; } //PROCESS PEAK DECIBELS double avPeakDB, sdPeakDB; NormalDist.AverageAndSD(topNValues, out avPeakDB, out sdPeakDB); return(System.Tuple.Create(avPeakDB, sdPeakDB)); }
/// <summary> /// THIS METHOD CALLED FROM ULTIMATELY UP LINE FROM AcousticIndicesCalculate class. /// returns an array showing which freq bin in each frame has the maximum amplitude /// </summary> /// <param name="spectrogram"></param> /// <param name="threshold"></param> /// <returns></returns> public static int[] GetSpectralMaxima(double[,] spectrogram, double threshold) { int rowCount = spectrogram.GetLength(0); int colCount = spectrogram.GetLength(1); var maxFreqArray = new int[rowCount]; //array (one element per frame) indicating which freq bin has max amplitude. //var hitsMatrix = new double[rowCount, colCount]; for (int r = 0; r < rowCount; r++) { double[] spectrum = DataTools.GetRow(spectrogram, r); spectrum = DataTools.filterMovingAverage(spectrum, 3); // smoothing to remove noise //find local freq maxima and store in freqArray & hits matrix. int maxFreqbin = DataTools.GetMaxIndex(spectrum); if (spectrum[maxFreqbin] > threshold) //only record spectral peak if it is above threshold. { maxFreqArray[r] = maxFreqbin; //hitsMatrix[r + nh, maxFreqbin] = 1.0; } } return(maxFreqArray); } // GetSpectralMaxima()
} // GetSpectralMaxima() /// <summary> /// THIS METHOD CALLED ONLY FROM THE Frogs.CS class. /// returns an array showing which freq bin in each frame has the maximum amplitude. /// However only returns values for those frames in the neighbourhood of an envelope peak. /// </summary> /// <param name="decibelsPerFrame"></param> /// <param name="spectrogram"></param> /// <param name="threshold"></param> /// <param name="nhLimit"></param> /// <returns></returns> public static Tuple <int[], double[, ]> GetSpectralMaxima(double[] decibelsPerFrame, double[,] spectrogram, double threshold, int nhLimit) { int rowCount = spectrogram.GetLength(0); int colCount = spectrogram.GetLength(1); var peaks = DataTools.GetPeakValues(decibelsPerFrame); var maxFreqArray = new int[rowCount]; //array (one element per frame) indicating which freq bin has max amplitude. var hitsMatrix = new double[rowCount, colCount]; for (int r = nhLimit; r < rowCount - nhLimit; r++) { if (peaks[r] < threshold) { continue; } //find local freq maxima and store in freqArray & hits matrix. for (int nh = -nhLimit; nh < nhLimit; nh++) { double[] spectrum = MatrixTools.GetRow(spectrogram, r + nh); spectrum[0] = 0.0; // set DC = 0.0 just in case it is max. int maxFreqbin = DataTools.GetMaxIndex(spectrum); if (spectrum[maxFreqbin] > threshold) //only record spectral peak if it is above threshold. { maxFreqArray[r + nh] = maxFreqbin; //if ((spectrum[maxFreqbin] > dBThreshold) && (sonogram.Data[r, maxFreqbin] >= sonogram.Data[r - 1, maxFreqbin]) && (sonogram.Data[r, maxFreqbin] >= sonogram.Data[r + 1, maxFreqbin])) hitsMatrix[r + nh, maxFreqbin] = 1.0; } } } return(Tuple.Create(maxFreqArray, hitsMatrix)); } // GetSpectralMaxima()
// ============================================================================= public static Tuple <double, double> DetectPeriodicityInArray(double[] array, int zeroBinCount) { var spectrum = CrossCorr(array, array); spectrum = DataTools.NormaliseArea(spectrum); // decrease weight of low frequency bins double gradient = 10 / (double)zeroBinCount; for (int s = 0; s < zeroBinCount; s++) { double divisor = 10 - (gradient * s); spectrum[s] /= divisor; // in real data these bins are dominant and hide other frequency content } int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; double period = 0.0; if (maxId != 0) { period = 2 * array.Length / (double)maxId; } return(Tuple.Create(intensityValue, period)); }
} //Analysis() /// <summary> /// Given the passed feature values (freq and oscRate) calculate p(Data|h[i]) for all hypotheses indexed by i. /// </summary> /// <param name="freq"></param> /// <param name="oscRate"></param> /// <param name="frogDataTable"></param> /// <returns></returns> public static string[] ClassifyFrogEvent(double freq, double oscRate, DataTable frogDataTable) { int rowCount = frogDataTable.Rows.Count; List <double> data = new List <double>(); data.Add(freq); data.Add(oscRate); double[] probScore = new double[rowCount]; for (int i = 0; i < rowCount; i++) // all rows in table = all frog hypotheses { DataRow row = frogDataTable.Rows[i]; List <double> targets = new List <double>(); targets.Add((double)((int)row["DominantFreq-Hz"])); targets.Add((double)((int)row["OscRate-cyclesPerSec"])); probScore[i] = GetNaiveBayesScore(targets.ToArray(), data.ToArray()); } int id = DataTools.GetMaxIndex(probScore); DataRow row1 = frogDataTable.Rows[id]; string[] names = new string[2]; names[0] = (string)row1["LatinName"]; names[1] = (string)row1["CommonName"]; return(names); }
public static void DrawSonogram(BaseSonogram sonogram, string path, double[] array1, double[] array2, List <double[]> scores) { Log.WriteLine("# Draw image of sonogram."); bool doHighlightSubband = false; bool add1kHzLines = true; //sonogram.FramesPerSecond = 1 / sonogram.FrameOffset; int length = sonogram.FrameCount; int maxIndex1 = DataTools.GetMaxIndex(array1); int maxIndex2 = DataTools.GetMaxIndex(array2); using (System.Drawing.Image img = sonogram.GetImage(doHighlightSubband, add1kHzLines)) using (Image_MultiTrack image = new Image_MultiTrack(img)) { //img.Save(@"C:\SensorNetworks\WavFiles\temp1\testimage1.jpg", System.Drawing.Imaging.ImageFormat.Jpeg); image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond)); image.AddTrack(ImageTrack.GetScoreTrack(DataTools.ScaleArray(array1, length), 0.0, array1[maxIndex1], 5)); image.AddTrack(ImageTrack.GetScoreTrack(DataTools.ScaleArray(array2, length), 0.0, array2[maxIndex2], 0.5)); for (int i = 0; i < scores.Count; i++) { int maxIndex = DataTools.GetMaxIndex(scores[i]); double max = scores[i][maxIndex]; if (max <= 0.0) { max = 1.0; } image.AddTrack(ImageTrack.GetScoreTrack(DataTools.ScaleArray(scores[i], length), 0.0, max, 0.1)); } image.Save(path); }// using }
/// <summary> /// returns an oscillation array for a single frequency bin. /// </summary> /// <param name="xCorrByTimeMatrix">derived from single frequency bin.</param> /// <param name="sensitivity">a threshold used to ignore low ascillation intensities.</param> /// <returns>vector of oscillation values.</returns> public static double[] GetOscillationArrayUsingFft(double[,] xCorrByTimeMatrix, double sensitivity) { int xCorrLength = xCorrByTimeMatrix.GetLength(0); int sampleCount = xCorrByTimeMatrix.GetLength(1); // set up vector to contain fft output var oscillationsVector = new double[xCorrLength / 2]; // loop over all the Auto-correlation vectors and do FFT for (int e = 0; e < sampleCount; e++) { double[] autocor = MatrixTools.GetColumn(xCorrByTimeMatrix, e); // zero mean the auto-correlation vector before doing FFT autocor = DataTools.DiffFromMean(autocor); FFT.WindowFunc wf = FFT.Hamming; var fft = new FFT(autocor.Length, wf); var spectrum = fft.Invoke(autocor); // skip spectrum[0] because it is DC or zero oscillations/sec spectrum = DataTools.Subarray(spectrum, 1, spectrum.Length - 2); // reduce the power in the low coeff because these can dominate. // This is a hack! spectrum[0] *= 0.33; // convert to energy and calculate total power in spectrum spectrum = DataTools.SquareValues(spectrum); double sumOfSquares = spectrum.Sum(); // get combined relative power in the three bins centred on max. int maxIndex = DataTools.GetMaxIndex(spectrum); double powerAtMax = spectrum[maxIndex]; if (maxIndex == 0) { powerAtMax += spectrum[1] + spectrum[2]; } else if (maxIndex >= spectrum.Length - 1) { powerAtMax += spectrum[maxIndex - 1] + spectrum[maxIndex]; } else { powerAtMax += spectrum[maxIndex - 1] + spectrum[maxIndex + 1]; } double relativePower = powerAtMax / sumOfSquares; // if the relative power of the max oscillation is large enough, // then accumulate its power into the oscillations Vector if (relativePower > sensitivity) { oscillationsVector[maxIndex] += powerAtMax; } } return(LogTransformOscillationVector(oscillationsVector, sampleCount)); }
/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram. /// Developed for GenericRecognizer of harmonics. /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise. /// But it requires that the frequency of any potential formants is not changing rapidly. /// THis may not be suitable for detecting human speech. However can reduce the frame step. /// </summary> /// <param name="m">spectrogram data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var binCount = m.GetLength(1); //set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(binCount, binCount); // set up arrays to store decibels, formant intensity and max index. var dBArray = new double[rowCount]; var intensity = new double[rowCount]; var maxIndexArray = new int[rowCount]; // for all time frames for (int t = 2; t < rowCount - 2; t++) { // get average of five adjacent frames var frame1 = MatrixTools.GetRow(m, t - 2); var frame2 = MatrixTools.GetRow(m, t - 1); var frame3 = MatrixTools.GetRow(m, t); var frame4 = MatrixTools.GetRow(m, t + 1); var frame5 = MatrixTools.GetRow(m, t + 2); var frame = new double[colCount]; for (int i = 0; i < colCount; i++) { frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5; } double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { // Would normally normalise the xcorr values for overlap count. // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values. //normXr[i] = xr[i] / (colCount - i); normXr[i] = xr[i]; } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
/// <summary> /// Runs a test of the ScanArrayForGratingPattern() method. /// First constructs a grating signal, then embeds it in longer noise signal /// The grating is defined by a period and the number of cycles. /// The search is repeated many iterations in order to get everage accuracy. /// Accuracy depends on relative levels of noise gain and signal gain i.e. the SNR. /// </summary> public static void Test_ScanArrayForGridPattern1() { int n = 500; double[] v = new double[n]; var rn = new RandomNumber(); int maxIterations = 1000; int count = 0; int numberOfCycles = 4; int cyclePeriod = 8; //MUST BE AN EVEN NUMBER!! double[] signal = GetPeriodicSignal(cyclePeriod, numberOfCycles); DataTools.writeBarGraph(signal); double bgNoiseGain = 0.1; double signalGain = 0.15; int locationOfSignalStart = 100; int searchStep = 1; int errorTolerance = cyclePeriod; if (errorTolerance < searchStep) { errorTolerance = searchStep + 1; } //run many repeats of the detection to determine its accuracy. Noise in signal means result varies from iteration to iteration. double scoreSum = 0.0; for (int iter = 0; iter < maxIterations; iter++) { //construct background signal. for (int i = 0; i < n; i++) { v[i] = rn.GetDouble() * bgNoiseGain; } //add in the signal for (int i = 0; i < signal.Length; i++) { v[locationOfSignalStart + i] += signal[i] * signalGain; } //DataTools.writeBarGraph(v); //detect grating in signal var output = ScanArrayForGratingPattern(v, searchStep, numberOfCycles, cyclePeriod); int maxLocation = DataTools.GetMaxIndex(output); scoreSum += output[maxLocation]; if (maxLocation > locationOfSignalStart - errorTolerance && maxLocation < locationOfSignalStart + errorTolerance) { //LoggedConsole.WriteLine("{0}\tscore: {1:f2}", iter, output[maxLocation]); count++; } } LoggedConsole.WriteLine("% correct = {0:f1} Avg score = {1:f4}", 100 * count / (double)maxIterations, scoreSum / (double)maxIterations); }
public static double[] GetOscillationArrayUsingWpd(double[,] xCorrByTimeMatrix, double sensitivity, int binNumber) { int xCorrLength = xCorrByTimeMatrix.GetLength(0); int sampleCount = xCorrByTimeMatrix.GetLength(1); double[] oscillationsVector = new double[xCorrLength / 2]; for (int e = 0; e < sampleCount; e++) { var autocor = MatrixTools.GetColumn(xCorrByTimeMatrix, e); autocor = DataTools.DiffFromMean(autocor); var wpd = new WaveletPacketDecomposition(autocor); double[] spectrum = wpd.GetWPDEnergySpectrumWithoutDC(); // reduce the power in first coeff because it can dominate - this is a hack! spectrum[0] *= 0.5; spectrum = DataTools.SquareValues(spectrum); // get relative power in the three bins around max. double sumOfSquares = spectrum.Sum(); //double avPower = spectrum.Sum() / spectrum.Length; int maxIndex = DataTools.GetMaxIndex(spectrum); double powerAtMax = spectrum[maxIndex]; if (maxIndex == 0) { powerAtMax += spectrum[maxIndex]; } else { powerAtMax += spectrum[maxIndex - 1]; } if (maxIndex >= spectrum.Length - 1) { powerAtMax += spectrum[maxIndex]; } else { powerAtMax += spectrum[maxIndex + 1]; } double relativePower1 = powerAtMax / sumOfSquares; if (relativePower1 > sensitivity) { // check for boundary overrun if (maxIndex < oscillationsVector.Length) { // add in a new oscillation oscillationsVector[maxIndex] += powerAtMax; } } } return(LogTransformOscillationVector(oscillationsVector, sampleCount)); }
/// <summary> /// Produce a CONFUSION MATRIX and a RANK ORDER MATRIX. /// </summary> /// <param name=""></param> /// <param name=""></param> public static void CalculateAccuracy(Arguments arguments, Output output) { int maxRank = 10; int speciesCount = arguments.SpeciesCount; int instanceCount = arguments.InstanceCount; output.ConfusionMatrix = new int[speciesCount, speciesCount]; output.RankOrderMatrix = new int[instanceCount, maxRank]; // loop through all instances for (int r = 0; r < instanceCount; r++) { int correctID = output.SpeciesID[r] - 1; double[] instanceScores = MatrixTools.GetRow(output.SimilarityScores, r); int maxID = DataTools.GetMaxIndex(instanceScores); output.ConfusionMatrix[correctID, maxID]++; // calculate rank order matrix. if (maxID == correctID) { output.RankOrderMatrix[r, 0] = 1; } instanceScores[maxID] = 0.0; for (int rank = 1; rank < maxRank; rank++) { maxID = DataTools.GetMaxIndex(instanceScores); if (maxID == correctID) { output.RankOrderMatrix[r, rank] = 1; break; } instanceScores[maxID] = 0.0; } } // end for loop r over all instances int diagonalSum = 0; for (int r = 0; r < speciesCount; r++) { diagonalSum += output.ConfusionMatrix[r, r]; } LoggedConsole.WriteLine("Diagonal Sum = " + diagonalSum); LoggedConsole.WriteLine("% Accuracy = " + (100 * diagonalSum / instanceCount)); LoggedConsole.WriteLine("% Rank"); for (int rank = 0; rank < maxRank; rank++) { var colSum = MatrixTools.SumColumn(output.RankOrderMatrix, rank); double acc = 100 * colSum / (double)instanceCount; string str = string.Format("{0} % Acc = {1:f2}", rank, acc); LoggedConsole.WriteLine(str); } }
public static void TestCrossCorrelation() { double[] signal2 = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }; double[] signal4 = { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0 }; double[] signal6 = { 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0 }; double[] signal7 = { 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1 }; double[] signal8 = { 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }; double[] signal10 = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }; double[] signal16 = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; int n = signal2.Length; double[] pattern2 = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }; double[] pattern4 = { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0 }; double[] pattern6 = { 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0 }; double[] pattern7 = { 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1 }; double[] pattern8 = { 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }; double[] pattern10 = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }; double[] pattern16 = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; LoggedConsole.WriteLine("Signal length = {0}", n); int smoothWindow = 3; double[] signal = DataTools.filterMovingAverage(signal16, smoothWindow); double[] pattern = DataTools.filterMovingAverage(pattern16, smoothWindow); var spectrum = CrossCorr(signal, pattern); int zeroCount = 3; for (int s = 1; s < zeroCount; s++) { spectrum[s] = 0.0; // in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; if (maxId == 0) { LoggedConsole.WriteLine("max id = 0"); } else { double period = 2 * n / (double)maxId; LoggedConsole.WriteLine("max id = {0}; period = {1:f2}; intensity = {2:f3}", maxId, period, intensityValue); } }
/// <summary> /// Returns a matrix whose columns consist of the bottom row of the WPD tree for each WPD window of length 2^L where L= levelNumber. /// The WPD windows do not overlap. /// </summary> /// <param name="signal"></param> /// <param name="levelNumber"></param> /// <returns></returns> public static double[,] GetWPDSpectralSequence(double[] signal, int levelNumber) { int windowWidth = (int)Math.Pow(2, levelNumber); int halfWindow = windowWidth / 2; int sampleCount = signal.Length / windowWidth; //int minLag, //int maxLag double[,] wpdByTime = new double[halfWindow, sampleCount]; for (int s = 0; s < sampleCount; s++) { int start = s * windowWidth; double[] subArray = DataTools.Subarray(signal, start, windowWidth); //double[] autocor = AutoCorrelation.MyAutoCorrelation(subArray); //autocor = DataTools.filterMovingAverage(autocor, 5); //autocor = DataTools.Subarray(autocor, autocor.Length / 4, windowWidth); //DataTools.writeBarGraph(autocor); // only interested in autocorrelation peaks > half max. An oscillation spreads autocor energy. //double threshold = autocor.Max() / 2; //int[] histo = DataTools.GetHistogramOfDistancesBetweenEveryPairOfPeaks(autocor, threshold); var wpd = new WaveletPacketDecomposition(subArray); double[] energySpectrumWithoutDC = wpd.GetWPDEnergySpectrumWithoutDC(); // there should only be one dominant oscilation in any one freq band at one time. // keep only the maximum value but divide by the total energy in the spectrum. // Energy dispersed through the spectrum is indicative of a single impulse, not an oscilation. int index = DataTools.GetMaxIndex(energySpectrumWithoutDC); double[] spectrum = new double[halfWindow]; spectrum[index] = energySpectrumWithoutDC[index] / energySpectrumWithoutDC.Sum(); MatrixTools.SetColumn(wpdByTime, s, spectrum); } // calculate statistics for values in matrix //string imagePath = @"C:\SensorNetworks\Output\Sonograms\wpdHistogram.png"; //Histogram.DrawDistributionsAndSaveImage(wpdByTime, imagePath); string path = @"C:\SensorNetworks\Output\Sonograms\testwavelet.png"; ImageTools.DrawReversedMatrix(wpdByTime, path); // MatrixTools.writeMatrix(wpdByTime); return(wpdByTime); }
public static Tuple <double[], double[]> DetectXcorrelationInTwoArrays(double[] array1, double[] array2, int step, int sampleLength, double minPeriod, double maxPeriod) { int length = array1.Length; int stepCount = length / step; double[] intensity = new double[length]; double[] periodicity = new double[length]; for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(array1, start, sampleLength); double[] upperSubarray = DataTools.Subarray(array2, start, sampleLength); if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength) { break; } var spectrum = CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 3; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; // in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId; // convert maxID to period in frames if (period < minPeriod || period > maxPeriod) { continue; } // lay down score for sample length for (int j = 0; j < sampleLength; j++) { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; periodicity[start + j] = period; } } } return(Tuple.Create(intensity, periodicity)); } // DetectXcorrelationInTwoArrays()
/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Developed for GenericRecognizer of harmonics. /// </summary> /// <param name="m">data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>two arrays.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); double[] dBArray = new double[rowCount]; var intensity = new double[rowCount]; //an array of formant intensity var maxIndexArray = new int[rowCount]; //an array of max value index values var binCount = m.GetLength(1); double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients // for all time frames for (int t = 0; t < rowCount; t++) { var frame = MatrixTools.GetRow(m, t); double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. Also need to normalise the values for overlap count. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { normXr[i] = xr[i] / (colCount - i); } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } // frames = rows of matrix return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
} //DetectBarsInTheRowsOfaMatrix() /// <summary> /// A METHOD TO DETECT HARMONICS IN THE ROWS of the passed portion of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Was first developed for crow calls. /// First looks for a decibel profile that matches the passed call duration and decibel loudness. /// Then samples the centre portion for the correct harmonic period. /// </summary> /// <param name="m">data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <param name="callSpan">Minimum length of call of interest.</param> /// <returns>a tuple.</returns> public static Tuple <double[], double[], double[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold, int callSpan) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var intensity = new double[rowCount]; //an array of period intensity var periodicity = new double[rowCount]; //an array of the periodicity values double[] dBArray = MatrixTools.GetRowAverages(m); dBArray = DataTools.filterMovingAverage(dBArray, 3); // for all time frames for (int t = 0; t < rowCount; t++) { if (dBArray[t] < dBThreshold) { continue; } var row = MatrixTools.GetRow(m, t); var spectrum = AutoAndCrossCorrelation.CrossCorr(row, row); int zeroBinCount = 3; //to remove low freq content which dominates the spectrum for (int s = 0; s < zeroBinCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; intensity[t] = intensityValue; double period = 0.0; if (maxId != 0) { period = 2 * colCount / (double)maxId; } periodicity[t] = period; } return(Tuple.Create(dBArray, intensity, periodicity)); }
/// <summary> /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// /// </summary> /// <param name="m"></param> /// <param name="amplitudeThreshold"></param> /// <returns></returns> public static Tuple <double[], double[]> DetectBarsInTheRowsOfaMatrix(double[,] m, double threshold, int zeroBinCount) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var intensity = new double[rowCount]; //an array of period intensity var periodicity = new double[rowCount]; //an array of the periodicity values double[] prevRow = MatrixTools.GetRow(m, 0); prevRow = DataTools.DiffFromMean(prevRow); for (int r = 1; r < rowCount; r++) { double[] thisRow = MatrixTools.GetRow(m, r); thisRow = DataTools.DiffFromMean(thisRow); var spectrum = AutoAndCrossCorrelation.CrossCorr(prevRow, thisRow); for (int s = 0; s < zeroBinCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; intensity[r] = intensityValue; double period = 0.0; if (maxId != 0) { period = 2 * colCount / (double)maxId; } periodicity[r] = period; prevRow = thisRow; }// rows return(Tuple.Create(intensity, periodicity)); } //DetectBarsInTheRowsOfaMatrix()
public static double[,] MaxPoolMatrixColumns(double[,] matrix, int reducedColCount) { int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); double[,] returnMatrix = new double[rows, reducedColCount]; for (int r = 0; r < rows; r++) { var rowVector = MatrixTools.GetRow(matrix, r); int[] bounds = { 8, 23, 53, 113, 233 }; // ie reduce the 256 vector to 4 values for (int c = 0; c < reducedColCount; c++) { int length = bounds[c + 1] - bounds[c]; double[] subvector = DataTools.Subarray(rowVector, bounds[c], length); int max = DataTools.GetMaxIndex(subvector); returnMatrix[r, c] = subvector[max]; } } return(returnMatrix); }
public static Tuple <double[], double[]> DetectPeriodicityInLongArray(double[] array, int step, int segmentLength, int zeroBinCount) { int n = array.Length; int stepCount = n / step; var intensity = new double[stepCount]; // an array of period intensity var periodicity = new double[stepCount]; // an array of the periodicity values // step through the array for (int i = 0; i < stepCount; i++) { int start = i * step; double[] subarray = DataTools.Subarray(array, start, segmentLength); var spectrum = CrossCorr(subarray, subarray); spectrum = DataTools.NormaliseArea(spectrum); double gradient = 10 / (double)zeroBinCount; for (int s = 0; s < zeroBinCount; s++) { double divisor = 10 - (gradient * s); spectrum[s] /= divisor; // in real data these bins are dominant and hide other frequency content } int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; intensity[i] = intensityValue; double period = 0.0; if (maxId != 0) { period = 2 * segmentLength / (double)maxId; } periodicity[i] = period; } return(Tuple.Create(intensity, periodicity)); }
public static double[,] MaxPoolMatrixColumnsByFactor(double[,] matrix, int factor) { int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); int reducedColCount = cols / factor; double[,] returnMatrix = new double[rows, reducedColCount]; for (int r = 0; r < rows; r++) { var rowVector = MatrixTools.GetRow(matrix, r); int lowerBound = 0; // ie reduce the 256 vector to 4 values for (int c = 0; c < reducedColCount; c++) { double[] subvector = DataTools.Subarray(rowVector, lowerBound, factor); int max = DataTools.GetMaxIndex(subvector); returnMatrix[r, c] = subvector[max]; lowerBound += factor; } } return(returnMatrix); }
/// <summary> /// The CORE ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double decibelThreshold = double.Parse(configDict["DECIBEL_THRESHOLD"]); //dB double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double callDuration = double.Parse(configDict["CALL_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); int callSpan = (int)Math.Round(callDuration * framesPerSecond); //############################################################################################################################################# //ii: DETECT HARMONICS var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan); double[] dBArray = results.Item1; double[] intensity = results.Item2; //an array of periodicity scores double[] periodicity = results.Item3; //intensity = DataTools.filterMovingAverage(intensity, 3); int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < harmonicIntensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. //expect humans to have max < 1000 Hz double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < noiseBound; j++) { spectrum[j] = 0.0; } int maxIndex = DataTools.GetMaxIndex(spectrum); int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); double discount = 1.0; if (freqWithMaxPower < 1200) { discount = 0.0; } if (intensity[r] > harmonicIntensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS double maxPossibleScore = 0.5; int halfCallSpan = callSpan / 2; var predictedEvents = new List <AcousticEvent>(); for (int i = 0; i < rowCount; i++) { //assume one score position per crow call if (scoreArray[i] < 0.001) { continue; } double startTime = (i - halfCallSpan) / framesPerSecond; AcousticEvent ev = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz); ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth); ev.Score = scoreArray[i]; ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold //ev.Score_MaxPossible = maxPossibleScore; predictedEvents.Add(ev); } //for loop Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// ################ THE KEY ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - ignore those set by user int frameSize = 128; double windowOverlap = 0.5; double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double minDuration = double.Parse(configDict["MIN_DURATION"]); // seconds double maxDuration = double.Parse(configDict["MAX_DURATION"]); // seconds double minPeriod = double.Parse(configDict["MIN_PERIOD"]); // seconds double maxPeriod = double.Parse(configDict["MAX_PERIOD"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double frameOffset = sonoConfig.GetFrameOffset(sr); double framesPerSecond = 1 / frameOffset; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 256 17640 14.5ms 68.9 68.9 ms hz hz // 512 17640 29.0ms 34.4 34.4 ms hz hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //The Xcorrelation-FFT technique requires number of bins to scan to be power of 2. // Assuming sr=17640 and window=256, then binWidth = 68.9Hz and 1500Hz = bin 21.7.. // Therefore do a Xcorrelation between bins 21 and 22. // Number of frames to span must power of 2. Try 16 frames which covers 232ms - almost 1/4 second. int midHz = 1500; int lowerBin = (int)(midHz / freqBinWidth) + 1; //because bin[0] = DC int upperBin = lowerBin + 4; int lowerHz = (int)Math.Floor((lowerBin - 1) * freqBinWidth); int upperHz = (int)Math.Ceiling((upperBin - 1) * freqBinWidth); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetColumn(sonogram.Data, lowerBin); double[] upperArray = MatrixTools.GetColumn(sonogram.Data, upperBin); lowerArray = DataTools.NormaliseInZeroOne(lowerArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB ####################################################################### upperArray = DataTools.NormaliseInZeroOne(upperArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB ####################################################################### int step = (int)(framesPerSecond / 40); //take one/tenth second steps int stepCount = rowCount / step; int sampleLength = 32; //16 frames = 232ms - almost 1/4 second. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if (lowerSubarray == null || upperSubarray == null) { break; } if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 2; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if (period < minPeriod || period > maxPeriod) { continue; } // lay down score for sample length for (int j = 0; j < sampleLength; j++) { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 3); intensity = DataTools.NormaliseInZeroOne(intensity, 0, 0.5); //## ABSOLUTE NORMALISATION 0-0.5 ####################################################################### List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerHz, upperHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray, segmentStartOffset); var hits = new double[rowCount, colCount]; var plots = new List <Plot>(); //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0)); //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0)); //plots.Add(new Plot("lowerArray", DataTools.NormaliseMatrixValues(lowerArray), 0.25)); //plots.Add(new Plot("upperArray", DataTools.NormaliseMatrixValues(upperArray), 0.25)); //plots.Add(new Plot("intensity", DataTools.NormaliseMatrixValues(intensity), intensityThreshold)); plots.Add(new Plot("intensity", intensity, intensityThreshold)); return(Tuple.Create(sonogram, hits, plots, predictedEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// Calculate summary statistics for supplied temporal and spectral targets. /// </summary> /// <remarks> /// The acoustic statistics calculated in this method are based on methods outlined in /// "Acoustic classification of multiple simultaneous bird species: A multi-instance multi-label approach", /// by Forrest Briggs, Balaji Lakshminarayanan, Lawrence Neal, Xiaoli Z.Fern, Raviv Raich, Sarah J.K.Hadley, Adam S. Hadley, Matthew G. Betts, et al. /// The Journal of the Acoustical Society of America v131, pp4640 (2012); doi: http://dx.doi.org/10.1121/1.4707424 /// .. /// The Briggs feature are calculated from the column (freq bin) and row (frame) sums of the extracted spectrogram. /// 1. Gini Index for frame and bin sums. A measure of dispersion. Problem with gini is that its value is dependent on the row or column count. /// We use entropy instead because value not dependent on row or column count because it is normalized. /// For the following meausres of k-central moments, the freq and time values are normalized in 0,1 to width of the event. /// 2. freq-mean /// 3. freq-variance /// 4. freq-skew and kurtosis /// 5. time-mean /// 6. time-variance /// 7. time-skew and kurtosis /// 8. freq-max (normalized) /// 9. time-max (normalized) /// 10. Briggs et al also calculate a 16 value histogram of gradients for each event mask. We do not do that here although we could. /// ... /// NOTE 1: There are differences between our method of noise reduction and Briggs. Briggs does not convert to decibels /// and instead works with power values. He obtains a noise profile from the 20% of frames having the lowest energy sum. /// NOTE 2: To NormaliseMatrixValues for noise, they divide the actual energy by the noise value. This is equivalent to subtraction when working in decibels. /// There are advantages and disadvantages to Briggs method versus ours. In our case, we hve to convert decibel values back to /// energy values when calculating the statistics for the extracted acoustic event. /// NOTE 3: We do not calculate the higher central moments of the time/frequency profiles, i.e. skew and kurtosis. /// Ony mean and standard deviation. /// .. /// NOTE 4: This method assumes that the passed event occurs totally within the passed recording, /// AND that the passed recording is of sufficient duration to obtain reliable BGN noise profile /// BUT not so long as to cause memory constipation. /// </remarks> /// <param name="recording">as type AudioRecording which contains the event</param> /// <param name="temporalTarget">Both start and end bounds - relative to the supplied recording</param> /// <param name="spectralTarget">both bottom and top bounds in Hertz</param> /// <param name="config">parameters that determine the outcome of the analysis</param> /// <param name="segmentStartOffset">How long since the start of the recording this event occurred</param> /// <returns>an instance of EventStatistics</returns> public static EventStatistics AnalyzeAudioEvent( AudioRecording recording, Range <TimeSpan> temporalTarget, Range <double> spectralTarget, EventStatisticsConfiguration config, TimeSpan segmentStartOffset) { var stats = new EventStatistics { EventStartSeconds = temporalTarget.Minimum.TotalSeconds, EventEndSeconds = temporalTarget.Maximum.TotalSeconds, LowFrequencyHertz = spectralTarget.Minimum, HighFrequencyHertz = spectralTarget.Maximum, SegmentDurationSeconds = recording.Duration.TotalSeconds, SegmentStartSeconds = segmentStartOffset.TotalSeconds, }; // temporal target is supplied relative to recording, but not the supplied audio segment // shift coordinates relative to segment var localTemporalTarget = temporalTarget.Shift(-segmentStartOffset); if (!recording .Duration .AsRangeFromZero(Topology.Inclusive) .Contains(localTemporalTarget)) { stats.Error = true; stats.ErrorMessage = $"Audio not long enough ({recording.Duration}) to analyze target ({localTemporalTarget})"; return(stats); } // convert recording to spectrogram int sampleRate = recording.SampleRate; double epsilon = recording.Epsilon; // extract the spectrogram var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, config.FrameSize, config.FrameStep); double hertzBinWidth = dspOutput1.FreqBinWidth; var stepDurationInSeconds = config.FrameStep / (double)sampleRate; var startFrame = (int)Math.Ceiling(localTemporalTarget.Minimum.TotalSeconds / stepDurationInSeconds); // subtract 1 frame because want to end before start of end point. var endFrame = (int)Math.Floor(localTemporalTarget.Maximum.TotalSeconds / stepDurationInSeconds) - 1; var bottomBin = (int)Math.Floor(spectralTarget.Minimum / hertzBinWidth); var topBin = (int)Math.Ceiling(spectralTarget.Maximum / hertzBinWidth); // Events can have their high value set to the nyquist. // Since the submatrix call below uses an inclusive upper bound an index out of bounds exception occurs in // these cases. So we just ask for the bin below. if (topBin >= config.FrameSize / 2) { topBin = (config.FrameSize / 2) - 1; } // Convert amplitude spectrogram to deciBels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // extract the required acoustic event var eventMatrix = MatrixTools.Submatrix(decibelSpectrogram, startFrame, bottomBin, endFrame, topBin); // Get the SNR of the event. This is just the max value in the matrix because noise reduced MatrixTools.MinMax(eventMatrix, out _, out double max); stats.SnrDecibels = max; // Now need to convert event matrix back to energy values before calculating other statistics eventMatrix = MatrixTools.Decibels2Power(eventMatrix); var columnAverages = MatrixTools.GetColumnAverages(eventMatrix); var rowAverages = MatrixTools.GetRowAverages(eventMatrix); // calculate the mean and temporal standard deviation in decibels NormalDist.AverageAndSD(rowAverages, out double mean, out double stddev); stats.MeanDecibels = 10 * Math.Log10(mean); stats.TemporalStdDevDecibels = 10 * Math.Log10(stddev); // calculate the frequency standard deviation in decibels NormalDist.AverageAndSD(columnAverages, out mean, out stddev); stats.FreqBinStdDevDecibels = 10 * Math.Log10(stddev); // calculate relative location of the temporal maximum int maxRowId = DataTools.GetMaxIndex(rowAverages); stats.TemporalMaxRelative = maxRowId / (double)rowAverages.Length; // calculate the entropy dispersion/concentration indices stats.TemporalEnergyDistribution = 1 - DataTools.EntropyNormalised(rowAverages); stats.SpectralEnergyDistribution = 1 - DataTools.EntropyNormalised(columnAverages); // calculate the spectral centroid and the dominant frequency double binCentroid = CalculateSpectralCentroid(columnAverages); stats.SpectralCentroid = (int)Math.Round(hertzBinWidth * binCentroid) + (int)spectralTarget.Minimum; int maxColumnId = DataTools.GetMaxIndex(columnAverages); stats.DominantFrequency = (int)Math.Round(hertzBinWidth * maxColumnId) + (int)spectralTarget.Minimum; // remainder of this method is to produce debugging images. Can comment out when not debugging. /* * var normalisedIndex = DataTools.NormaliseMatrixValues(columnAverages); * var image4 = GraphsAndCharts.DrawGraph("columnSums", normalisedIndex, 100); * string path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\columnSums.png"; * image4.Save(path4); * normalisedIndex = DataTools.NormaliseMatrixValues(rowAverages); * image4 = GraphsAndCharts.DrawGraph("rowSums", normalisedIndex, 100); * path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\rowSums.png"; * image4.Save(path4); */ return(stats); }
/// <summary> /// THIS METHOD NO LONGER IN USE. /// NOT USEFUL FOR ANIMAL CALLS. /// Tried this but it is suitable only when there is guarantee of numerous spectral tracks as in the vowels of human speech. /// It yields SPURIOUS RESULTS where there is only one whistle track. /// </summary> public static double[,] DetectHarmonicsUsingDCT(double[,] matrix, int minBin, int maxBin, int hzWidth, bool normaliseDCT, int minPeriod, int maxPeriod, double dctThreshold) { int dctLength = maxBin - minBin + 1; //DCT spans N freq bins int minIndex = (int)(hzWidth / (double)maxPeriod * 2); //Times 0.5 because index = Pi and not 2Pi int maxIndex = (int)(hzWidth / (double)minPeriod * 2); //Times 0.5 because index = Pi and not 2Pi //double period = hzWidth / (double)indexOfMaxValue * 2; //Times 2 because index = Pi and not 2Pi if (maxIndex > dctLength) { maxIndex = dctLength; //safety check in case of future changes to code. } int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); double[,] hits = new double[rows, cols]; double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients for (int r = 0; r < rows - dctLength; r++) { //for (int c = minBin; c <= minBin; c++)//traverse columns - skip DC column //{ var array = new double[dctLength]; //accumulate J rows of values for (int i = 0; i < dctLength; i++) { for (int j = 0; j < 5; j++) { array[i] += matrix[r + j, minBin + i]; } } array = DataTools.SubtractMean(array); // DataTools.writeBarGraph(array); double[] dct = MFCCStuff.DCT(array, cosines); for (int i = 0; i < dctLength; i++) { dct[i] = Math.Abs(dct[i]); //convert to absolute values } for (int i = 0; i < 5; i++) { dct[i] = 0.0; //remove low freq values from consideration } if (normaliseDCT) { dct = DataTools.normalise2UnitLength(dct); } int indexOfMaxValue = DataTools.GetMaxIndex(dct); //DataTools.writeBarGraph(dct); double period = hzWidth / (double)indexOfMaxValue * 2; //Times 2 because index = Pi and not 2Pi //mark DCT location with harmonic freq, only if harmonic freq is in correct range and amplitude if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dct[indexOfMaxValue] > dctThreshold) { for (int i = 0; i < dctLength; i++) { hits[r, minBin + i] = period; } for (int i = 0; i < dctLength; i++) { hits[r + 1, minBin + i] = period; //alternate row } } //c += 5; //skip columns //} r++; //do alternate row } return(hits); }
public static double[] CalculateScores(double[] subBandSpectrum, int windowWidth) { double[] scores = { 0, 0, 0 }; //TEST ONE /* * double totalAreaUnderSpectrum = subBandSpectrum.Sum(); * double areaUnderLowest24bins = 0.0; * for (int i = 0; i < 24; i++) * { * areaUnderLowest24bins += subBandSpectrum[i]; * } * double areaUnderHighBins = totalAreaUnderSpectrum - areaUnderLowest24bins; * double areaUnderBins4to7 = 0.0; * for (int i = 4; i < 7; i++) * { * areaUnderBins4to7 += subBandSpectrum[i]; * } * double ratio1 = areaUnderBins4to7 / areaUnderLowest24bins; * * double areaUnderBins38to72 = 0.0; * for (int i = 38; i < 44; i++) * { * areaUnderBins38to72 += subBandSpectrum[i]; * } * for (int i = 52; i < 57; i++) * { * areaUnderBins38to72 += subBandSpectrum[i]; * } * for (int i = 64; i < 72; i++) * { * areaUnderBins38to72 += subBandSpectrum[i]; * } * double ratio2 = areaUnderBins38to72 / areaUnderHighBins; * double score = (ratio1 * 0.2) + (ratio2 * 0.8); * double[] truePositives = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0006, 0.0014, 0.0015, 0.0010, 0.0002, 0.0001, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0005, 0.0006, 0.0005, 0.0003, 0.0002, 0.0001, 0.0002, 0.0007, 0.0016, 0.0026, 0.0035, 0.0037, 0.0040, 0.0046, 0.0040, 0.0031, 0.0022, 0.0048, 0.0133, 0.0149, 0.0396, 0.1013, 0.1647, 0.2013, 0.2236, 0.2295, 0.1836, 0.1083, 0.0807, 0.0776, 0.0964, 0.1116, 0.0987, 0.1065, 0.1575, 0.3312, 0.4829, 0.5679, 0.5523, 0.4412, 0.2895, 0.2022, 0.2622, 0.2670, 0.2355, 0.1969, 0.2220, 0.6600, 0.9023, 1.0000, 0.8099, 0.8451, 0.8210, 0.5511, 0.1756, 0.0319, 0.0769, 0.0738, 0.2235, 0.3901, 0.4565, 0.4851, 0.3703, 0.3643, 0.2497, 0.2705, 0.3456, 0.3096, 0.1809, 0.0710, 0.0828, 0.0857, 0.0953, 0.1308, 0.1387, 0.0590 }; * * if (score > 0.4) * eventFound = true; * if ((areaUnderHighBins/3) < areaUnderLowest24bins) * //if (ratio1 > ratio2) * { * eventFound = false; * } */ // TEST TWO (A) // these are used for scoring //double[] truePositives1 = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0006, 0.0014, 0.0015, 0.0010, 0.0002, 0.0001, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0005, 0.0006, 0.0005, 0.0003, 0.0002, 0.0001, 0.0002, 0.0007, 0.0016, 0.0026, 0.0035, 0.0037, 0.0040, 0.0046, 0.0040, 0.0031, 0.0022, 0.0048, 0.0133, 0.0149, 0.0396, 0.1013, 0.1647, 0.2013, 0.2236, 0.2295, 0.1836, 0.1083, 0.0807, 0.0776, 0.0964, 0.1116, 0.0987, 0.1065, 0.1575, 0.3312, 0.4829, 0.5679, 0.5523, 0.4412, 0.2895, 0.2022, 0.2622, 0.2670, 0.2355, 0.1969, 0.2220, 0.6600, 0.9023, 1.0000, 0.8099, 0.8451, 0.8210, 0.5511, 0.1756, 0.0319, 0.0769, 0.0738, 0.2235, 0.3901, 0.4565, 0.4851, 0.3703, 0.3643, 0.2497, 0.2705, 0.3456, 0.3096, 0.1809, 0.0710, 0.0828, 0.0857, 0.0953, 0.1308, 0.1387, 0.0590 }; //double[] truePositives2 = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0001, 0.0001, 0.0001, 0.0000, 0.0000, 0.0001, 0.0001, 0.0003, 0.0004, 0.0004, 0.0002, 0.0001, 0.0001, 0.0003, 0.0003, 0.0006, 0.0007, 0.0020, 0.0127, 0.0256, 0.0426, 0.0512, 0.0560, 0.0414, 0.0237, 0.0133, 0.0107, 0.0091, 0.0077, 0.0085, 0.0165, 0.0144, 0.0308, 0.0416, 0.0454, 0.0341, 0.0191, 0.0128, 0.0058, 0.0026, 0.0081, 0.0139, 0.0313, 0.0404, 0.0493, 0.0610, 0.1951, 0.4083, 0.5616, 0.5711, 0.5096, 0.4020, 0.2917, 0.1579, 0.1421, 0.1461, 0.1406, 0.2098, 0.1676, 0.2758, 0.2875, 0.6513, 0.9374, 1.0000, 0.7576, 0.4130, 0.2622, 0.1495, 0.0973, 0.0623, 0.0425, 0.0205, 0.0034, 0.0065, 0.0054, 0.0089, 0.0138, 0.0208, 0.0204, 0.0168, 0.0136, 0.0149, 0.0155, 0.0106, 0.0086, 0.0099, 0.0187 }; //double[] truePositivesA = NormalDist.Convert2ZScores(truePositivesA); //double[] truePositivesB = NormalDist.Convert2ZScores(truePositivesB); // TEST TWO (B) // Use these spectra when using my filtering (i.e. not Chris's prefiltered) // these spectra are used for scoring when the window size is 2048 //double[] truePositives1 = { 0.0014, 0.0012, 0.0009, 0.0003, 0.0001, 0.0005, 0.0008, 0.0029, 0.0057, 0.0070, 0.0069, 0.0063, 0.0053, 0.0032, 0.0013, 0.0011, 0.0011, 0.0007, 0.0000, 0.0006, 0.0010, 0.0013, 0.0008, 0.0009, 0.0022, 0.0046, 0.0069, 0.0082, 0.0070, 0.0065, 0.0082, 0.0078, 0.0052, 0.0021, 0.0132, 0.0357, 0.0420, 0.0996, 0.2724, 0.4557, 0.5739, 0.6366, 0.6155, 0.4598, 0.2334, 0.1468, 0.1410, 0.1759, 0.2157, 0.1988, 0.2131, 0.3072, 0.6161, 0.8864, 1.0000, 0.9290, 0.6983, 0.4208, 0.2690, 0.3190, 0.3109, 0.2605, 0.1896, 0.2118, 0.5961, 0.8298, 0.9290, 0.7363, 0.6605, 0.5840, 0.3576, 0.1019, 0.0162, 0.0400, 0.0405, 0.1106, 0.1803, 0.2083, 0.2058, 0.1475, 0.1387, 0.0870, 0.0804, 0.0975, 0.0848, 0.0490, 0.0193, 0.0217, 0.0210, 0.0214, 0.0253, 0.0254, 0.0072 }; //double[] truePositives2 = { 0.0090, 0.0106, 0.0138, 0.0134, 0.0088, 0.0026, 0.0002, 0.0002, 0.0003, 0.0000, 0.0001, 0.0006, 0.0013, 0.0019, 0.0020, 0.0015, 0.0008, 0.0004, 0.0002, 0.0015, 0.0022, 0.0073, 0.0195, 0.0628, 0.2203, 0.4031, 0.5635, 0.5445, 0.4828, 0.2869, 0.1498, 0.0588, 0.0500, 0.0542, 0.0641, 0.1188, 0.1833, 0.1841, 0.2684, 0.3062, 0.2831, 0.1643, 0.0606, 0.0336, 0.0136, 0.0056, 0.0187, 0.0301, 0.0700, 0.1103, 0.1559, 0.2449, 0.5303, 0.8544, 1.0000, 0.8361, 0.6702, 0.4839, 0.3463, 0.1525, 0.1049, 0.1201, 0.1242, 0.2056, 0.1653, 0.2685, 0.2947, 0.5729, 0.7024, 0.6916, 0.4765, 0.2488, 0.1283, 0.0543, 0.0326, 0.0236, 0.0187, 0.0108, 0.0021, 0.0028, 0.0019, 0.0024, 0.0041, 0.0063, 0.0066, 0.0055, 0.0036, 0.0025, 0.0018, 0.0014, 0.0013, 0.0008, 0.0010 }; // these spectra are used for scoring when the window size is 1024 double[] truePositives1 = { 0.0007, 0.0004, 0.0000, 0.0025, 0.0059, 0.0069, 0.0044, 0.0012, 0.0001, 0.0006, 0.0013, 0.0032, 0.0063, 0.0067, 0.0070, 0.0033, 0.0086, 0.0128, 0.1546, 0.4550, 0.6197, 0.4904, 0.2075, 0.0714, 0.1171, 0.4654, 0.8634, 1.0000, 0.7099, 0.2960, 0.1335, 0.3526, 0.6966, 0.9215, 0.6628, 0.3047, 0.0543, 0.0602, 0.0931, 0.1364, 0.1314, 0.1047, 0.0605, 0.0204, 0.0128, 0.0114 }; double[] truePositives2 = { 0.0126, 0.0087, 0.0043, 0.0002, 0.0000, 0.0010, 0.0018, 0.0016, 0.0005, 0.0002, 0.0050, 0.1262, 0.4054, 0.5111, 0.3937, 0.1196, 0.0156, 0.0136, 0.0840, 0.1598, 0.1691, 0.0967, 0.0171, 0.0152, 0.0234, 0.3648, 0.8243, 1.0000, 0.6727, 0.2155, 0.0336, 0.0240, 0.2661, 0.6240, 0.7523, 0.5098, 0.1493, 0.0149, 0.0046, 0.0020, 0.0037, 0.0061, 0.0061, 0.0036, 0.0010, 0.0008 }; var zscores = NormalDist.Convert2ZScores(subBandSpectrum); double correlationScore = 0.0; double score1 = AutoAndCrossCorrelation.CorrelationCoefficient(zscores, truePositives1); double score2 = AutoAndCrossCorrelation.CorrelationCoefficient(zscores, truePositives2); correlationScore = score1; if (score2 > correlationScore) { correlationScore = score2; } // TEST THREE: sharpness and height of peaks // score the four heighest peaks double peaksScore = 0; double[] spectrumCopy = new double[subBandSpectrum.Length]; for (int i = 0; i < subBandSpectrum.Length; i++) { spectrumCopy[i] = subBandSpectrum[i]; } // set spectrum bounds int lowerBound = subBandSpectrum.Length / 4; int upperBound = subBandSpectrum.Length * 7 / 8; for (int p = 0; p < 4; p++) { int peakLocation = DataTools.GetMaxIndex(spectrumCopy); if (peakLocation < lowerBound) { continue; // peak location cannot be too low } if (peakLocation > upperBound) { continue; // peak location cannot be too high } double peakHeight = spectrumCopy[peakLocation]; int nh = 3; if (windowWidth == 2048) { nh = 6; } double peakSides = (subBandSpectrum[peakLocation - nh] + subBandSpectrum[peakLocation + nh]) / 2; peaksScore += peakHeight - peakSides; //now zero peak and peak neighbourhood if (windowWidth == 2048) { nh = 9; } for (int n = 0; n < nh; n++) { spectrumCopy[peakLocation + n] = 0; spectrumCopy[peakLocation - n] = 0; } } // for 4 peaks // take average of four peaks peaksScore /= 4; // TEST FOUR: peak position ratios // //int[] peakLocationCentres = { 3, 10, 37, 44, 54, 67 }; int[] peakLocationCentres = { 2, 5, 19, 22, 27, 33 }; int nh2 = 6; if (windowWidth == 1024) { nh2 = 3; } int[] actualPeakLocations = new int[6]; double[] relativePeakHeights = new double[6]; for (int p = 0; p < 6; p++) { double max = -double.MaxValue; int maxId = peakLocationCentres[p]; for (int id = peakLocationCentres[p] - 4; id < peakLocationCentres[p] + 4; id++) { if (id < 0) { id = 0; } if (subBandSpectrum[id] > max) { max = subBandSpectrum[id]; maxId = id; } } actualPeakLocations[p] = maxId; int lowerPosition = maxId - nh2; if (lowerPosition < 0) { lowerPosition = 0; } relativePeakHeights[p] = subBandSpectrum[maxId] - subBandSpectrum[lowerPosition] - subBandSpectrum[maxId + nh2]; } double[] targetHeights = { 0.1, 0.1, 0.5, 0.5, 1.0, 0.6 }; var zscores1 = NormalDist.Convert2ZScores(relativePeakHeights); var zscores2 = NormalDist.Convert2ZScores(targetHeights); double relativePeakScore = AutoAndCrossCorrelation.CorrelationCoefficient(zscores1, zscores2); //########################################################################################### // PROCESS SCORES //if (score1 > scoreThreshold) eventFound = true; //if ((score1 > scoreThreshold) || (score2 > scoreThreshold)) eventFound = true; //double score = (correlationScore * 0.3) + (peaksScore * 0.7); double score = (relativePeakScore * 0.4) + (peaksScore * 0.6); scores[0] = score; scores[1] = relativePeakScore; scores[2] = peaksScore; return(scores); }
/// <summary> /// Remove events whose acoustic profile does not match that of a flying fox. /// </summary> /// <param name="events">unfiltered acoustic events.</param> /// <param name="sonogram">includes matrix of spectrogram values.</param> /// <returns>filtered acoustic events.</returns> private static List <AcousticEvent> FilterEventsForSpectralProfile(List <AcousticEvent> events, BaseSonogram sonogram) { double[,] spectrogramData = sonogram.Data; //int colCount = spectrogramData.GetLength(1); // The following freq bins are used to demarcate freq bands for spectral tests below. // The hertz values are hard coded but could be included in the config.yml file. int maxBin = (int)Math.Round(8000 / sonogram.FBinWidth); int fourKiloHzBin = (int)Math.Round(4000 / sonogram.FBinWidth); int oneKiloHzBin = (int)Math.Round(1000 / sonogram.FBinWidth); var filteredEvents = new List <AcousticEvent>(); foreach (AcousticEvent ae in events) { int startFrame = ae.Oblong.RowTop; //int endFrame = ae.Oblong.RowBottom; // get all the frames of the acoustic event //var subMatrix = DataTools.Submatrix(spectrogramData, startFrame, 0, endFrame, colCount - 1); // get only the frames from centre of the acoustic event var subMatrix = DataTools.Submatrix(spectrogramData, startFrame + 1, 0, startFrame + 4, maxBin); var spectrum = MatrixTools.GetColumnAverages(subMatrix); var normalisedSpectrum = DataTools.normalise(spectrum); normalisedSpectrum = DataTools.filterMovingAverageOdd(normalisedSpectrum, 11); var maxId = DataTools.GetMaxIndex(normalisedSpectrum); //var hzMax = (int)Math.Ceiling(maxId * sonogram.FBinWidth); // Do TESTS to determine if event has spectrum matching a Flying fox. // Test 1: Spectral maximum should be below 4 kHz. bool passTest1 = maxId < fourKiloHzBin; // Test 2: There should be little energy in 0-1 kHz band. var subband1Khz = DataTools.Subarray(normalisedSpectrum, 0, oneKiloHzBin); double bandArea1 = subband1Khz.Sum(); double energyRatio1 = bandArea1 / normalisedSpectrum.Sum(); // 0.125 = 1/8. i.e. test requires that energy in 0-1kHz band is less than average in all 8 kHz bands // 0.0938 = 3/32. i.e. test requires that energy in 0-1kHz band is less than 3/4 average in all 8 kHz bands // 0.0625 = 1/16. i.e. test requires that energy in 0-1kHz band is less than half average in all 8 kHz bands bool passTest2 = !(energyRatio1 > 0.1); // Test 3: There should be little energy in 4-5 kHz band. var subband4Khz = DataTools.Subarray(normalisedSpectrum, fourKiloHzBin, oneKiloHzBin); double bandArea2 = subband4Khz.Sum(); double energyRatio2 = bandArea2 / normalisedSpectrum.Sum(); bool passTest3 = !(energyRatio2 > 0.125); // TODO write method to determine similarity of spectrum to a true flying fox spectrum. // Problem: it is not certain how variable the FF spectra are. // In ten minutes of recording used so far, which include 14-15 obvious calls, there appear to be two spectral types. // One type has three peaks at around 1.5 kHz, 3 kHz and 6 kHz. // The other type have two peaks around 2.5 and 5.5 kHz. //if (passTest1) //if (true) if (passTest1 && passTest2 && passTest3) { filteredEvents.Add(ae); //DEBUG SPECTRAL PROFILES: UNCOMMENT following lines to get spectral profiles of the events. /* * double startSecond = ae.EventStartSeconds - ae.SegmentStartSeconds; * string name = "CallSpectrum " + (ae.SegmentStartSeconds / 60) + "m" + (int)Math.Floor(startSecond) + "s hzMax" + hzMax; * var bmp2 = GraphsAndCharts.DrawGraph(name, normalisedSpectrum, 100); * bmp2.Save(Path.Combine(@"PATH\Towsey.PteropusSpecies", name + ".png")); */ } } return(filteredEvents); }
public Tuple <int, int, int[], List <double[]> > TrainNet(List <double[]> trainingData, int maxIter, int seed, int initialWtCount) { int dataSetSize = trainingData.Count; int[] randomArray = RandomNumber.RandomizeNumberOrder(dataSetSize, seed); //randomize order of trn set // bool skippedBecauseFull; int[] inputCategory = new int[dataSetSize]; //stores the winning OP node for each current input signal int[] prevCategory = new int[dataSetSize]; //stores the winning OP node for each previous input signal this.InitialiseWtArrays(trainingData, randomArray, initialWtCount); //{********* GO THROUGH THE TRAINING SET for 1 to MAX ITERATIONS *********} //repeat //{training set until max iter or trn set learned} int[] opNodeWins = null; //stores the number of times each OP node wins int iterNum = 0; bool trainSetLearned = false; // : boolean; while (!trainSetLearned && iterNum < maxIter) { iterNum++; opNodeWins = new int[this.OPSize]; //stores the number of times each OP node wins //initialise convergence criteria. Want stable F2node allocations trainSetLearned = true; int changedCategory = 0; //{READ AND PROCESS signals until end of the data file} for (int sigNum = 0; sigNum < dataSetSize; sigNum++) { //select an input signal. Later use sigID to enable test of convergence int sigID = sigNum; // do signals in order if (RandomiseTrnSetOrder) { sigID = randomArray[sigNum]; //pick at random } //{*********** PASS ONE INPUT SIGNAL THROUGH THE NETWORK ***********} double[] OP = this.PropagateIP2OP(trainingData[sigID]); //output = AND divided by OR of two vectors int index = DataTools.GetMaxIndex(OP); double winningOP = OP[index]; //create new category if similarity OP of best matching node is too low if (winningOP < this.VigilanceRho) { this.ChangeWtsOfFirstUncommittedNode(trainingData[sigID]); } inputCategory[sigID] = index; //winning F2 node for current input opNodeWins[index]++; //{test if training set is learned ie each signal is classified to the same F2 node as previous iteration} if (inputCategory[sigID] != prevCategory[sigID]) { trainSetLearned = false; changedCategory++; } } //end loop over all signal inputs //set the previous categories for (int x = 0; x < dataSetSize; x++) { prevCategory[x] = inputCategory[x]; } //remove committed F2 nodes that are not having wins for (int j = 0; j < this.OPSize; j++) { if (this.committedNode[j] && opNodeWins[j] == 0) { this.committedNode[j] = false; } } if (Verbose) { LoggedConsole.WriteLine(" iter={0:D2} committed=" + this.CountCommittedF2Nodes() + "\t changedCategory=" + changedCategory, iterNum); } if (trainSetLearned) { break; } } //end of while (! trainSetLearned or (iterNum < maxIter) or terminate); return(Tuple.Create(iterNum, this.CountCommittedF2Nodes(), inputCategory, this.wts)); } //TrainNet()
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new LitoriaCaeruleaConfig(); recognizerConfig.ReadConfigFile(configuration); // common properties string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no name>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // BETTER TO SET THESE. IGNORE USER! // This framesize is large because the oscillation we wish to detect is due to repeated croaks // having an interval of about 0.6 seconds. The overlap is also required to give smooth oscillation. const int frameSize = 2048; const double windowOverlap = 0.5; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // use the default HAMMING window //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.None NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.0, }; TimeSpan recordingDuration = recording.WavReader.Time; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = sr / (sonoConfig.WindowSize * (1 - windowOverlap)); //int dominantFreqBin = (int)Math.Round(recognizerConfig.DominantFreq / freqBinWidth) + 1; int minBin = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1; var decibelThreshold = 9.0; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER int rowCount = sonogram.Data.GetLength(0); // get the freq band as set by min and max Herz var frogBand = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // Now look for spectral maxima. For L.caerulea, the max should lie around 1100Hz +/-150 Hz. // Skip over spectra where maximum is not in correct location. int buffer = 150; var croakScoreArray = new double[rowCount]; var hzAtTopOfTopBand = recognizerConfig.DominantFreq + buffer; var hzAtBotOfTopBand = recognizerConfig.DominantFreq - buffer; var binAtTopOfTopBand = (int)Math.Round((hzAtTopOfTopBand - recognizerConfig.MinHz) / freqBinWidth); var binAtBotOfTopBand = (int)Math.Round((hzAtBotOfTopBand - recognizerConfig.MinHz) / freqBinWidth); // scan the frog band and get the decibel value of those spectra which have their maximum within the correct subband. for (int x = 0; x < rowCount; x++) { //extract spectrum var spectrum = MatrixTools.GetRow(frogBand, x); int maxIndex = DataTools.GetMaxIndex(spectrum); if (spectrum[maxIndex] < decibelThreshold) { continue; } if (maxIndex < binAtTopOfTopBand && maxIndex > binAtBotOfTopBand) { croakScoreArray[x] = spectrum[maxIndex]; } } // Perpare a normalised plot for later display with spectrogram double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var text1 = string.Format($"Croak scores (threshold={decibelThreshold})"); var croakPlot1 = new Plot(text1, normalisedScores, normalisedThreshold); // extract potential croak events from the array of croak candidate var croakEvents = AcousticEvent.ConvertScoreArray2Events( croakScoreArray, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, recognizerConfig.EventThreshold, recognizerConfig.MinCroakDuration, recognizerConfig.MaxCroakDuration, segmentStartOffset); // add necesary info into the candidate events var prunedEvents = new List <AcousticEvent>(); foreach (var ae in croakEvents) { // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; prunedEvents.Add(ae); } // With those events that survive the above Array2Events process, we now extract a new array croak scores croakScoreArray = AcousticEvent.ExtractScoreArrayFromEvents(prunedEvents, rowCount, recognizerConfig.AbbreviatedSpeciesName); DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var text2 = string.Format($"Croak events (threshold={decibelThreshold})"); var croakPlot2 = new Plot(text2, normalisedScores, normalisedThreshold); // Look for oscillations in the difference array // duration of DCT in seconds //croakScoreArray = DataTools.filterMovingAverageOdd(croakScoreArray, 5); double dctDuration = recognizerConfig.DctDuration; // minimum acceptable value of a DCT coefficient double dctThreshold = recognizerConfig.DctThreshold; double minOscRate = 1 / recognizerConfig.MaxPeriod; double maxOscRate = 1 / recognizerConfig.MinPeriod; var dctScores = Oscillations2012.DetectOscillations(croakScoreArray, framesPerSecond, dctDuration, minOscRate, maxOscRate, dctThreshold); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER var events = AcousticEvent.ConvertScoreArray2Events( dctScores, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, recognizerConfig.EventThreshold, recognizerConfig.MinDuration, recognizerConfig.MaxDuration, segmentStartOffset); double[,] hits = null; prunedEvents = new List <AcousticEvent>(); foreach (var ae in events) { // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; prunedEvents.Add(ae); } // do a recognizer test. if (MainEntry.InDEBUG) { //TestTools.RecognizerScoresTest(scores, new FileInfo(recording.FilePath)); //AcousticEvent.TestToCompareEvents(prunedEvents, new FileInfo(recording.FilePath)); } var scoresPlot = new Plot(this.DisplayName, dctScores, recognizerConfig.EventThreshold); if (true) { // display a variety of debug score arrays // calculate amplitude at location double[] amplitudeArray = MatrixTools.SumRows(frogBand); DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scoresPlot, croakPlot2, croakPlot1, amplPlot }; // NOTE: This DrawDebugImage() method can be over-written in this class. var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits); var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); debugImage.Save(debugPath); } return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = scoresPlot.AsList(), Events = prunedEvents, //Events = events }); }