} // GetSpectralMaxima() /// <summary> /// THIS METHOD CALLED ONLY FROM THE Frogs.CS class. /// returns an array showing which freq bin in each frame has the maximum amplitude. /// However only returns values for those frames in the neighbourhood of an envelope peak. /// </summary> /// <param name="decibelsPerFrame"></param> /// <param name="spectrogram"></param> /// <param name="threshold"></param> /// <param name="nhLimit"></param> /// <returns></returns> public static Tuple <int[], double[, ]> GetSpectralMaxima(double[] decibelsPerFrame, double[,] spectrogram, double threshold, int nhLimit) { int rowCount = spectrogram.GetLength(0); int colCount = spectrogram.GetLength(1); var peaks = DataTools.GetPeakValues(decibelsPerFrame); var maxFreqArray = new int[rowCount]; //array (one element per frame) indicating which freq bin has max amplitude. var hitsMatrix = new double[rowCount, colCount]; for (int r = nhLimit; r < rowCount - nhLimit; r++) { if (peaks[r] < threshold) { continue; } //find local freq maxima and store in freqArray & hits matrix. for (int nh = -nhLimit; nh < nhLimit; nh++) { double[] spectrum = MatrixTools.GetRow(spectrogram, r + nh); spectrum[0] = 0.0; // set DC = 0.0 just in case it is max. int maxFreqbin = DataTools.GetMaxIndex(spectrum); if (spectrum[maxFreqbin] > threshold) //only record spectral peak if it is above threshold. { maxFreqArray[r + nh] = maxFreqbin; //if ((spectrum[maxFreqbin] > dBThreshold) && (sonogram.Data[r, maxFreqbin] >= sonogram.Data[r - 1, maxFreqbin]) && (sonogram.Data[r, maxFreqbin] >= sonogram.Data[r + 1, maxFreqbin])) hitsMatrix[r + nh, maxFreqbin] = 1.0; } } } return(Tuple.Create(maxFreqArray, hitsMatrix)); } // GetSpectralMaxima()
public static double[,] ExoticMaxPoolingMatrixColumns(double[,] matrix, int reducedColCount) { int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); double[,] returnMatrix = new double[rows, reducedColCount]; for (int r = 0; r < rows; r++) { var rowVector = MatrixTools.GetRow(matrix, r); // ie reduce the second half of vector by factor of two. for (int c = 0; c < 100; c++) { returnMatrix[r, c] = rowVector[c]; } int offset = 0; for (int c = 100; c < reducedColCount; c++) { returnMatrix[r, c] = rowVector[c + offset]; offset += 1; } } return(returnMatrix); }
/// <summary> /// Normalisation and Concatentation of spectra: /// can be done in three ways ie (i) Unit length (ii) Unit Area (iii) Unit bounds i.e. 0,1. /// </summary> /// <param name="arguments"></param> /// <param name="output"></param> public static void Normalise(Arguments arguments, Output output) { var keyArray = FEATURE_KEYS.Split(','); int speciesCount = arguments.SpeciesCount; int instanceCount = arguments.InstanceCount; // loop through all species for (int r = 0; r < speciesCount; r++) { double[] ipVector = MatrixTools.GetRow(output.SpeciesFeatureMatrix, r); double[] normedVector = NormaliseVector(ipVector, output.Weights); for (int c = 0; c < normedVector.Length; c++) { output.SpeciesFeatureMatrix[r, c] = normedVector[c]; } } // loop through all instances for (int r = 0; r < instanceCount; r++) { double[] ipVector = MatrixTools.GetRow(output.InstanceFeatureMatrix, r); double[] normedVector = NormaliseVector(ipVector, output.Weights); for (int c = 0; c < normedVector.Length; c++) { output.InstanceFeatureMatrix[r, c] = normedVector[c]; } } // end for loop r over all instances }
/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a spectrogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram. /// Developed for GenericRecognizer of harmonics. /// WARNING: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise. /// But it requires that the frequency of any potential formants is not changing rapidly. /// THis may not be suitable for detecting human speech. However can reduce the frame step. /// </summary> /// <param name="m">spectrogram data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var binCount = m.GetLength(1); //set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(binCount, binCount); // set up arrays to store decibels, formant intensity and max index. var dBArray = new double[rowCount]; var intensity = new double[rowCount]; var maxIndexArray = new int[rowCount]; // for all time frames for (int t = 2; t < rowCount - 2; t++) { // get average of five adjacent frames var frame1 = MatrixTools.GetRow(m, t - 2); var frame2 = MatrixTools.GetRow(m, t - 1); var frame3 = MatrixTools.GetRow(m, t); var frame4 = MatrixTools.GetRow(m, t + 1); var frame5 = MatrixTools.GetRow(m, t + 2); var frame = new double[colCount]; for (int i = 0; i < colCount; i++) { frame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5; } double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { // Would normally normalise the xcorr values for overlap count. // But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values. //normXr[i] = xr[i] / (colCount - i); normXr[i] = xr[i]; } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
public void TestFreqScaleOnArtificialSignal1() { int sampleRate = 22050; double duration = 20; // signal duration in seconds int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; int windowSize = 512; var freqScale = new FrequencyScale(sampleRate / 2, windowSize, 1000); var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal1_LinearFreqScale.png"); var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = 0.0, SourceFName = "Signal1", NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.12, }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); // pick a row, any row var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); oneSpectrum = DataTools.filterMovingAverage(oneSpectrum, 5); var peaks = DataTools.GetPeaks(oneSpectrum); for (int i = 5; i < peaks.Length - 5; i++) { if (peaks[i]) { LoggedConsole.WriteLine($"bin ={freqScale.BinBounds[i, 0]}, Herz={freqScale.BinBounds[i, 1]}-{freqScale.BinBounds[i + 1, 1]} "); } } foreach (int h in harmonics) { LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); } // spectrogram without framing, annotation etc var image = sonogram.GetImage(); string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sampleRate} Window={windowSize}"; image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); image.Save(outputImagePath); // Check that image dimensions are correct Assert.AreEqual(861, image.Width); Assert.AreEqual(310, image.Height); Assert.IsTrue(peaks[11]); Assert.IsTrue(peaks[22]); Assert.IsTrue(peaks[45]); Assert.IsTrue(peaks[92]); Assert.IsTrue(peaks[185]); }
/// <summary> /// Produce a CONFUSION MATRIX and a RANK ORDER MATRIX. /// </summary> /// <param name=""></param> /// <param name=""></param> public static void CalculateAccuracy(Arguments arguments, Output output) { int maxRank = 10; int speciesCount = arguments.SpeciesCount; int instanceCount = arguments.InstanceCount; output.ConfusionMatrix = new int[speciesCount, speciesCount]; output.RankOrderMatrix = new int[instanceCount, maxRank]; // loop through all instances for (int r = 0; r < instanceCount; r++) { int correctID = output.SpeciesID[r] - 1; double[] instanceScores = MatrixTools.GetRow(output.SimilarityScores, r); int maxID = DataTools.GetMaxIndex(instanceScores); output.ConfusionMatrix[correctID, maxID]++; // calculate rank order matrix. if (maxID == correctID) { output.RankOrderMatrix[r, 0] = 1; } instanceScores[maxID] = 0.0; for (int rank = 1; rank < maxRank; rank++) { maxID = DataTools.GetMaxIndex(instanceScores); if (maxID == correctID) { output.RankOrderMatrix[r, rank] = 1; break; } instanceScores[maxID] = 0.0; } } // end for loop r over all instances int diagonalSum = 0; for (int r = 0; r < speciesCount; r++) { diagonalSum += output.ConfusionMatrix[r, r]; } LoggedConsole.WriteLine("Diagonal Sum = " + diagonalSum); LoggedConsole.WriteLine("% Accuracy = " + (100 * diagonalSum / instanceCount)); LoggedConsole.WriteLine("% Rank"); for (int rank = 0; rank < maxRank; rank++) { var colSum = MatrixTools.SumColumn(output.RankOrderMatrix, rank); double acc = 100 * colSum / (double)instanceCount; string str = string.Format("{0} % Acc = {1:f2}", rank, acc); LoggedConsole.WriteLine(str); } }
public static void TestMethod_GenerateSignal1() { int sampleRate = 22050; double duration = 20; // signal duration in seconds int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; int windowSize = 512; var freqScale = new FrequencyScale(sampleRate / 2, windowSize, 1000); string path = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\SineSignal1.png"; var recording = GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = 0.0, SourceFName = "Signal1", NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.12, }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); // pick a row, any row var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); oneSpectrum = DataTools.normalise(oneSpectrum); var peaks = DataTools.GetPeaks(oneSpectrum, 0.5); for (int i = 2; i < peaks.Length - 2; i++) { if (peaks[i]) { LoggedConsole.WriteLine($"bin ={freqScale.BinBounds[i, 0]}, Herz={freqScale.BinBounds[i, 1]}-{freqScale.BinBounds[i + 1, 1]} "); } } if (peaks[11] && peaks[22] && peaks[45] && peaks[92] && peaks[185]) { LoggedConsole.WriteSuccessLine("Spectral Peaks found at correct places"); } else { LoggedConsole.WriteErrorLine("Spectral Peaks found at INCORRECT places"); } foreach (int h in harmonics) { LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); } // spectrogram without framing, annotation etc var image = sonogram.GetImage(); string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sampleRate} Window={windowSize}"; image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); image.Save(path); }
/// <summary> /// Read five sets of acoustic indices into a matrix each row of which is a combined feature vector. /// </summary> public static double[,] ReadSpectralIndicesFromIndexMatrices(DirectoryInfo dir, string baseName, TimeSpan startTime, TimeSpan duration) { //get start and end minutes int startMinute = (int)startTime.TotalMinutes; int minuteSpan = (int)duration.TotalMinutes; int endMinute = startMinute + minuteSpan; // obtain a matrix to see what size data we are dealing with // assume all matrices have the same dimensions. // construct a path to the required matrix var key = ContentSignatures.IndexNames[0]; var path = Path.Combine(dir.FullName, baseName + "__Towsey.Acoustic." + key + ".csv"); // read in the matrix and get its dimensions var indexMatrix = Csv.ReadMatrixFromCsv <double>(new FileInfo(path)); var rowCount = indexMatrix.GetLength(0); var colCount = indexMatrix.GetLength(1); if (rowCount < endMinute) { throw new ArgumentOutOfRangeException(string.Empty, "Not enough rows in matrix to read the given timespan."); } // set up the return Matrix // indexCount will be number of indices X number of frequency bins var indexCount = ContentSignatures.IndexNames.Length * colCount; var opMatrix = new double[minuteSpan, indexCount]; for (int i = 1; i < ContentSignatures.IndexNames.Length; i++) { key = ContentSignatures.IndexNames[i]; // construct a path to the required matrix path = Path.Combine(dir.FullName, baseName + "__Towsey.Acoustic." + key + ".csv"); // read in the matrix indexMatrix = Csv.ReadMatrixFromCsv <double>(new FileInfo(path)); for (int r = 0; r < rowCount; r++) { // copy in index[key] row var row = MatrixTools.GetRow(indexMatrix, r); int startColumn = colCount * i; for (int c = 0; c < colCount; c++) { var normalisedValue = row[c]; opMatrix[r, startColumn + c] = normalisedValue; } } } return(opMatrix); }
public static Dictionary <string, double[]> GetIndicesForOneMinute(Dictionary <string, double[, ]> allIndices, int rowId) { var opIndices = new Dictionary <string, double[]>(); var keys = allIndices.Keys; foreach (string key in keys) { var success = allIndices.TryGetValue(key, out double[,] matrix); if (success) { opIndices.Add(key, MatrixTools.GetRow(matrix, rowId)); } } return(opIndices); }
} // GetInstanceRepresentations() public static void GetSpeciesRepresentations(Arguments arguments, Output output) { LoggedConsole.WriteLine("\n\n2a. Obtain feature representation of every species."); int instanceCount = arguments.InstanceCount; int speciesCount = arguments.SpeciesCount; var keyArray = FEATURE_KEYS.Split(','); int featureCount = output.InstanceFeatureMatrix.GetLength(1); // initialise species description matrix double[,] speciesFeatureMatrix = new double[speciesCount, featureCount]; int[] frameNumbersPerSpecies = new int[speciesCount]; // loop through all 50 species for (int i = 0; i < speciesCount; i++) { int speciesLabel = i + 1; LoggedConsole.Write(" " + speciesLabel); // loop through all instances multiple times - once for each species for (int j = 0; j < instanceCount; j++) { if (output.SpeciesID[j] != speciesLabel) { continue; } //aggregate the instance feature values double[] ipVector = MatrixTools.GetRow(output.InstanceFeatureMatrix, j); for (int c = 0; c < featureCount; c++) { speciesFeatureMatrix[i, c] += ipVector[c]; } //output.InstanceNumbersPerSpecies[i]++; frameNumbersPerSpecies[i] += output.FrameNumbersPerInstance[j]; } // end for loop j over all instances } // loop through all 50 species LoggedConsole.WriteLine(" Done"); output.SpeciesFeatureMatrix = speciesFeatureMatrix; output.FrameNumbersPerSpecies = frameNumbersPerSpecies; } // GetSpeciesRepresentations()
/// <summary> /// A METHOD TO DETECT HARMONICS IN THE sub-band of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Developed for GenericRecognizer of harmonics. /// </summary> /// <param name="m">data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <returns>two arrays.</returns> public static Tuple <double[], double[], int[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); double[] dBArray = new double[rowCount]; var intensity = new double[rowCount]; //an array of formant intensity var maxIndexArray = new int[rowCount]; //an array of max value index values var binCount = m.GetLength(1); double[,] cosines = MFCCStuff.Cosines(binCount, binCount); //set up the cosine coefficients // for all time frames for (int t = 0; t < rowCount; t++) { var frame = MatrixTools.GetRow(m, t); double maxValue = frame.Max(); dBArray[t] = maxValue; if (maxValue < dBThreshold) { continue; } double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(frame); // xr has twice length of frame and is symmetrical. // Require only first half. Also need to normalise the values for overlap count. double[] normXr = new double[colCount]; for (int i = 0; i < colCount; i++) { normXr[i] = xr[i] / (colCount - i); } // now do DCT across the auto cross xr int lowerDctBound = 2; var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound); int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients); intensity[t] = dctCoefficients[indexOfMaxValue]; maxIndexArray[t] = indexOfMaxValue; } // frames = rows of matrix return(Tuple.Create(dBArray, intensity, maxIndexArray)); }
} // GetSpeciesRepresentations() public static void DrawSpeciesImages(Arguments arguments, Output output) { LoggedConsole.WriteLine("2b. Draw feature representation of every species."); int scalingFactor = 20; int imageHeight = 100; int speciesCount = arguments.SpeciesCount; var keyArray = FEATURE_KEYS.Split(','); int featureCount = keyArray.Length * output.ReducedSpectralLength; // loop through all 50 species for (int r = 0; r < speciesCount; r++) { double[] ipVector = MatrixTools.GetRow(output.SpeciesFeatureMatrix, r); // now make images var images = new List <Image>(); int featureID = 0; foreach (string key in keyArray) { double[] vector = new double[output.ReducedSpectralLength]; int featureOffset = featureID * output.ReducedSpectralLength; for (int c = 0; c < output.ReducedSpectralLength; c++) { vector[c] = ipVector[featureOffset + c]; } featureID++; vector = DataTools.Normalise2Probabilites(vector); vector = DataTools.filterMovingAverage(vector, 3); string label = string.Format("{0} {1} ({2})", r + 1, key, output.InstanceNumbersPerSpecies[r]); Image image = GraphsAndCharts.DrawGraph(label, vector, output.ReducedSpectralLength, imageHeight, scalingFactor); images.Add(image); } Image combinedImage = ImageTools.CombineImagesVertically(images); string outputFileName = string.Format("Species{0}.SpectralFeatures.png", r + 1); string path = Path.Combine(arguments.OutputDirectory.FullName, outputFileName); combinedImage.Save(path); } // loop through 50 species }
} //DetectBarsInTheRowsOfaMatrix() /// <summary> /// A METHOD TO DETECT HARMONICS IN THE ROWS of the passed portion of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Was first developed for crow calls. /// First looks for a decibel profile that matches the passed call duration and decibel loudness. /// Then samples the centre portion for the correct harmonic period. /// </summary> /// <param name="m">data matrix.</param> /// <param name="dBThreshold">Minimum sound level.</param> /// <param name="callSpan">Minimum length of call of interest.</param> /// <returns>a tuple.</returns> public static Tuple <double[], double[], double[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold, int callSpan) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var intensity = new double[rowCount]; //an array of period intensity var periodicity = new double[rowCount]; //an array of the periodicity values double[] dBArray = MatrixTools.GetRowAverages(m); dBArray = DataTools.filterMovingAverage(dBArray, 3); // for all time frames for (int t = 0; t < rowCount; t++) { if (dBArray[t] < dBThreshold) { continue; } var row = MatrixTools.GetRow(m, t); var spectrum = AutoAndCrossCorrelation.CrossCorr(row, row); int zeroBinCount = 3; //to remove low freq content which dominates the spectrum for (int s = 0; s < zeroBinCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; intensity[t] = intensityValue; double period = 0.0; if (maxId != 0) { period = 2 * colCount / (double)maxId; } periodicity[t] = period; } return(Tuple.Create(dBArray, intensity, periodicity)); }
/// <summary> /// This done using Cosine similarity. Could also use Euclidian distance. /// </summary> /// <param name=""></param> /// <param name=""></param> public static void CalculateSimilarityScores(Arguments arguments, Output output) { int speciesCount = arguments.SpeciesCount; int instanceCount = arguments.InstanceCount; output.SimilarityScores = new double[instanceCount, speciesCount]; // loop through all instances for (int r = 0; r < instanceCount; r++) { double[] instance = MatrixTools.GetRow(output.InstanceFeatureMatrix, r); for (int s = 0; s < speciesCount; s++) { double[] species = MatrixTools.GetRow(output.SpeciesFeatureMatrix, s); double similarity = DataTools.DotProduct(instance, species); output.SimilarityScores[r, s] = similarity; } } // end for loop r over all instances }
public static double[,] MaxPoolingLimited(double[,] M, int startBin, int maxOf2Bin, int maxOf3Bin, int endBin, int reducedBinCount) { int rows = M.GetLength(0); int cols = M.GetLength(1); var reducedM = new double[rows, reducedBinCount]; for (int r = 0; r < rows; r++) { var rowVector = MatrixTools.GetRow(M, r); double[] V = MaxPoolingLimited(rowVector, startBin, maxOf2Bin, maxOf3Bin, endBin); for (int c = 0; c < reducedBinCount; c++) { reducedM[r, c] = V[c]; } } return(reducedM); }
/// <summary> /// Converts a spectrogram having linear freq scale to one having an Octave freq scale. /// Note that the sample rate (sr) and the frame size both need to be apporpriate to the choice of FreqScaleType. /// TODO: SHOULD DEVELOP A SEPARATE UNIT TEST for this method /// </summary> public static double[,] ConvertLinearSpectrogramToOctaveFreqScale(double[,] inputSpgram, FrequencyScale freqScale) { if (freqScale == null) { throw new ArgumentNullException(nameof(freqScale)); } if (freqScale.ScaleType == FreqScaleType.Linear) { LoggedConsole.WriteLine("Linear Hz Scale is not valid for this Octave method."); throw new ArgumentNullException(nameof(freqScale)); } // get the octave bin bounds for this octave scale type var octaveBinBounds = freqScale.BinBounds; //var octaveBinBounds = GetOctaveScale(freqScale.ScaleType); int newBinCount = octaveBinBounds.GetLength(0); // set up the new octave spectrogram int frameCount = inputSpgram.GetLength(0); //int binCount = inputSpgram.GetLength(1); double[,] octaveSpectrogram = new double[frameCount, newBinCount]; for (int row = 0; row < frameCount; row++) { //get each frame or spectrum in turn var linearSpectrum = MatrixTools.GetRow(inputSpgram, row); // convert the spectrum to its octave form var octaveSpectrum = OctaveSpectrum(octaveBinBounds, linearSpectrum); //return the spectrum to output spectrogram. MatrixTools.SetRow(octaveSpectrogram, row, octaveSpectrum); } return(octaveSpectrogram); }
/// <summary> /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// /// </summary> /// <param name="m"></param> /// <param name="amplitudeThreshold"></param> /// <returns></returns> public static Tuple <double[], double[]> DetectBarsInTheRowsOfaMatrix(double[,] m, double threshold, int zeroBinCount) { int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var intensity = new double[rowCount]; //an array of period intensity var periodicity = new double[rowCount]; //an array of the periodicity values double[] prevRow = MatrixTools.GetRow(m, 0); prevRow = DataTools.DiffFromMean(prevRow); for (int r = 1; r < rowCount; r++) { double[] thisRow = MatrixTools.GetRow(m, r); thisRow = DataTools.DiffFromMean(thisRow); var spectrum = AutoAndCrossCorrelation.CrossCorr(prevRow, thisRow); for (int s = 0; s < zeroBinCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; intensity[r] = intensityValue; double period = 0.0; if (maxId != 0) { period = 2 * colCount / (double)maxId; } periodicity[r] = period; prevRow = thisRow; }// rows return(Tuple.Create(intensity, periodicity)); } //DetectBarsInTheRowsOfaMatrix()
public static double[,] MaxPoolMatrixColumns(double[,] matrix, int reducedColCount) { int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); double[,] returnMatrix = new double[rows, reducedColCount]; for (int r = 0; r < rows; r++) { var rowVector = MatrixTools.GetRow(matrix, r); int[] bounds = { 8, 23, 53, 113, 233 }; // ie reduce the 256 vector to 4 values for (int c = 0; c < reducedColCount; c++) { int length = bounds[c + 1] - bounds[c]; double[] subvector = DataTools.Subarray(rowVector, bounds[c], length); int max = DataTools.GetMaxIndex(subvector); returnMatrix[r, c] = subvector[max]; } } return(returnMatrix); }
public static double[,] MaxPoolMatrixColumnsByFactor(double[,] matrix, int factor) { int rows = matrix.GetLength(0); int cols = matrix.GetLength(1); int reducedColCount = cols / factor; double[,] returnMatrix = new double[rows, reducedColCount]; for (int r = 0; r < rows; r++) { var rowVector = MatrixTools.GetRow(matrix, r); int lowerBound = 0; // ie reduce the 256 vector to 4 values for (int c = 0; c < reducedColCount; c++) { double[] subvector = DataTools.Subarray(rowVector, lowerBound, factor); int max = DataTools.GetMaxIndex(subvector); returnMatrix[r, c] = subvector[max]; lowerBound += factor; } } return(returnMatrix); }
internal RecognizerResults Gruntwork(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1; // make a spectrogram var config = new SonogramConfig { WindowSize = 256, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; config.WindowOverlap = 0.0; // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); int sampleRate = audioRecording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); int frameSize = config.WindowSize; int frameStep = frameSize; // this default = zero overlap double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; // reading in variables from the config file string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; int minHz = configuration.GetInt(AnalysisKeys.MinHz); int maxHz = configuration.GetInt(AnalysisKeys.MaxHz); // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventThresholdDb = 6; // minimum score for an acceptable event - that is when processing the score array. double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 256. int minFrameWidth = 7; int maxFrameWidth = 14; double minDuration = (minFrameWidth - 1) * frameStepInSeconds; double maxDuration = maxFrameWidth * frameStepInSeconds; // Calculate Max Amplitude int binMin = (int)Math.Round(minHz / sonogram.FBinWidth); int binMax = (int)Math.Round(maxHz / sonogram.FBinWidth); int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] scores = new double[rowCount]; // predefinition of score array double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. // mark the hits in hitMatrix for (int s = 0; s < rowCount; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = double.MinValue; int maxId = 0; // loop through bandwidth of L.onvex call and look for dominant frequency for (int binID = 5; binID < binMax; binID++) { if (spectrum[binID] > maxAmplitude) { maxAmplitude = spectrum[binID]; maxId = binID; } } if (maxId < binMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } scores[s] = maxAmplitude; dominantBins[s] = maxId; // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // loop through all spectra // Find average amplitude double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix( sonogram.Data, 0, binMin, rowCount - 1, binMax); var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7); // We now have a list of potential hits for C. tinnula. This needs to be filtered. var startEnds = new List <Point>(); Plot.FindStartsAndEndsOfScoreEvents(highPassFilteredSignal, eventThresholdDb, minFrameWidth, maxFrameWidth, out var prunedScores, out startEnds); // High pass Filter // loop through the score array and find beginning and end of potential events var potentialEvents = new List <AcousticEvent>(); foreach (Point point in startEnds) { // get average of the dominant bin int binSum = 0; int binCount = 0; int eventWidth = point.Y - point.X + 1; for (int s = point.X; s <= point.Y; s++) { if (dominantBins[s] >= binMin) { binSum += dominantBins[s]; binCount++; } } // find average dominant bin for the event int avDominantBin = (int)Math.Round(binSum / (double)binCount); int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * sonogram.FBinWidth); // Get score for the event. // Use a simple template for the honk and calculate cosine similarity to the template. // Template has three dominant frequenices. // minimum number of bins covering frequency bandwidth of C. tinnula call// minimum number of bins covering frequency bandwidth of L.convex call int callBinWidth = 14; var templates = GetCtinnulaTemplates(callBinWidth); var eventMatrix = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1); double eventScore = GetEventScore(eventMatrix, templates); // put hits into hits matrix // put cosine score into the score array for (int s = point.X; s <= point.Y; s++) { hits[s, avDominantBin] = 10; prunedScores[s] = eventScore; } if (eventScore < similarityThreshold) { continue; } int topBinForEvent = avDominantBin + 2; int bottomBinForEvent = topBinForEvent - callBinWidth; double startTime = point.X * frameStepInSeconds; double durationTime = eventWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, minHz, maxHz); newEvent.DominantFreq = avDominantFreq; newEvent.Score = eventScore; newEvent.SetTimeAndFreqScales(framesPerSec, sonogram.FBinWidth); newEvent.Name = string.Empty; // remove name because it hides spectral content of the event. potentialEvents.Add(newEvent); } // display the original score array scores = DataTools.normalise(scores); var debugPlot = new Plot(this.DisplayName, scores, similarityThreshold); // DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display a variety of debug score arrays DataTools.Normalise(amplitudeArray, eventThresholdDb, out var normalisedScores, out var normalisedThreshold); var ampltdPlot = new Plot("Average amplitude", normalisedScores, normalisedThreshold); DataTools.Normalise(highPassFilteredSignal, eventThresholdDb, out normalisedScores, out normalisedThreshold); var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold); /* * DataTools.Normalise(scores, eventThresholdDb, out normalisedScores, out normalisedThreshold); * var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold); * * * DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold); * var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold); */ var debugPlots = new List <Plot> { ampltdPlot, demeanedPlot }; Image debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, null); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // display the cosine similarity scores var plot = new Plot(this.DisplayName, prunedScores, similarityThreshold); var plots = new List <Plot> { plot }; // add names into the returned events foreach (AcousticEvent ae in potentialEvents) { ae.Name = "speciesName"; // abbreviatedSpeciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
/// <summary> /// New and alternative version of Lconvex recogniser because discovered that the call is more variable than I first realised. /// </summary> internal RecognizerResults Gruntwork2(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { // make a spectrogram double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1; int frameStep = 512; int sampleRate = audioRecording.SampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; var config = new SonogramConfig { WindowSize = frameStep, // this default = zero overlap WindowOverlap = 0.0, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column // var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); // sonogram.Data = spg; var spg = sonogram.Data; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); double herzPerBin = sampleRate / 2.0 / colCount; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // ## TWO THRESHOLDS // The threshold dB amplitude in the dominant freq bin required to yield an event double eventThresholdDb = configuration.GetDoubleOrNull("PeakThresholdDecibels") ?? 3.0; // minimum score for an acceptable event - that is when processing the score array. double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.5; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512. int callFrameWidth = 5; int callHalfWidth = callFrameWidth / 2; // minimum number of bins covering frequency bandwidth of L.convex call // call has binWidth=25 but we want zero buffer of four bins either side. int callBinWidth = 25; int binSilenceBuffer = 4; int topFrequency = configuration.GetInt("TopFrequency"); // # The Limnodynastes call has a duration of 3-5 frames given the above settings. // # But we will assume 5-7 because sometimes the three harmonics are not exactly alligned!! // # The call has three major peaks. The top peak, typically the dominant peak, is at approx 1850, a value which is set in the convig. // # The second and third peak are at equal gaps below. TopFreq-gap and TopFreq-(2*gap); // # The gap could be set in the Config file, but this is not implemented yet. // Instead the algorithm uses three pre-fixed templates that determine the different kinds ogap. Gap is typically close to 500Hz // In the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // In the Kiyomi's JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So the strategy is to look for three peaks separated by same amount and in the vicinity of the above, // To this end we produce three templates each of length 36, but having 2nd and 3rd peaks at different intervals. var templates = GetLconvexTemplates(callBinWidth, binSilenceBuffer); int templateHeight = templates[0].Length; // NOTE: could give user control over other call features // Such as frequency gap between peaks. But not in this first iteration of the recognizer. //int peakGapInHerz = (int)configuration["PeakGap"]; int searchBand = 8; int topBin = (int)Math.Round(topFrequency / herzPerBin); int bottomBin = topBin - templateHeight - searchBand + 1; if (bottomBin < 0) { Log.Fatal("Template bandwidth exceeds availble bandwidth given your value for top frequency."); } spg = MatrixTools.Submatrix(spg, 0, bottomBin, sonogram.Data.GetLength(0) - 1, topBin); double[,] frames = MatrixTools.Submatrix(spg, 0, 0, callFrameWidth - 1, spg.GetLength(1) - 1); double[] spectrum = MatrixTools.GetColumnSums(frames); // set up arrays for monitoring important event parameters double[] decibels = new double[rowCount]; int[] bottomBins = new int[rowCount]; double[] scores = new double[rowCount]; // predefinition of score array int[] templateIds = new int[rowCount]; double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. for (int s = callFrameWidth; s < rowCount; s++) { double[] rowToRemove = MatrixTools.GetRow(spg, s - callFrameWidth); double[] rowToAdd = MatrixTools.GetRow(spg, s); // shift frame block to the right. for (int b = 0; b < spectrum.Length; b++) { spectrum[b] = spectrum[b] - rowToRemove[b] + rowToAdd[b]; } // now check if frame block matches a template. ScanEventScores(spectrum, templates, out double eventScore, out int eventBottomBin, out int templateId); //hits[rowCount, colCount]; decibels[s - callHalfWidth - 1] = spectrum.Max() / callFrameWidth; bottomBins[s - callHalfWidth - 1] = eventBottomBin + bottomBin; scores[s - callHalfWidth - 1] = eventScore; templateIds[s - callHalfWidth - 1] = templateId; } // loop through all spectra // we now have a score array and decibel array and bottom bin array for the entire spectrogram. // smooth them to find events scores = DataTools.filterMovingAverageOdd(scores, 5); decibels = DataTools.filterMovingAverageOdd(decibels, 3); var peaks = DataTools.GetPeaks(scores); // loop through the score array and find potential events var potentialEvents = new List <AcousticEvent>(); for (int s = callHalfWidth; s < scores.Length - callHalfWidth - 1; s++) { if (!peaks[s]) { continue; } if (scores[s] < similarityThreshold) { continue; } if (decibels[s] < eventThresholdDb) { continue; } // put hits into hits matrix // put cosine score into the score array //for (int s = point.X; s <= point.Y; s++) //{ // hits[s, topBins[s]] = 10; //} int bottomBinForEvent = bottomBins[s]; int topBinForEvent = bottomBinForEvent + templateHeight; int topFreqForEvent = (int)Math.Round(topBinForEvent * herzPerBin); int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin); double startTime = (s - callHalfWidth) * frameStepInSeconds; double durationTime = callFrameWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent) { //Name = string.Empty, // remove name because it hides spectral content of the event. Name = "Lc" + templateIds[s], Score = scores[s], }; newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin); potentialEvents.Add(newEvent); } // display the original score array scores = DataTools.normalise(scores); var scorePlot = new Plot(this.DisplayName + " scores", scores, similarityThreshold); DataTools.Normalise(decibels, eventThresholdDb, out double[] normalisedDb, out double normalisedThreshold); var decibelPlot = new Plot("Decibels", normalisedDb, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, decibelPlot }; if (this.displayDebugImage) { var debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // display the cosine similarity scores var plot = new Plot(this.DisplayName, scores, similarityThreshold); var plots = new List <Plot> { plot }; // add names into the returned events string speciesName = configuration[AnalysisKeys.SpeciesName] ?? this.SpeciesName; foreach (var ae in potentialEvents) { ae.Name = abbreviatedSpeciesName; ae.SpeciesName = speciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
internal RecognizerResults Gruntwork1(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { // make a spectrogram double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1; var config = new SonogramConfig { WindowSize = 512, WindowOverlap = 0.0, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, }; // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); sonogram.Data = spg; int sampleRate = audioRecording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); //double epsilon = Math.Pow(0.5, audioRecording.BitsPerSample - 1); int frameSize = colCount * 2; int frameStep = frameSize; // this default = zero overlap //double frameDurationInSeconds = frameSize / (double)sampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; double herzPerBin = sampleRate / 2.0 / colCount; //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventThresholdDb = 10.0; // minimum score for an acceptable event - that is when processing the score array. double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512. int minFrameWidth = 3; int maxFrameWidth = 5; //double minDuration = (minFrameWidth - 1) * frameStepInSeconds; //double maxDuration = maxFrameWidth * frameStepInSeconds; // minimum number of bins covering frequency bandwidth of L.convex call int callBinWidth = 25; int silenceBinBuffer = 4; // # The Limnodynastes call has a duration of 3-5 frames given the above settings. // # The call has three major peaks. The dominant peak is at approx 1850, a value which is set in the convig. // # The second and third peak are at equal gaps below. DominantFreq-gap and DominantFreq-(2*gap); // # Set the gap in the Config file. Should typically be in range 880 to 970 // for Limnodynastes convex, in the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // for Limnodynastes convex, in the Kiyomi's JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So the strategy is to look for three peaks separated by same amount and in the vicinity of the above, // starting with highest power (the top peak) and working down to lowest power (bottom peak). // To this end we produce two templates each of length 25, but having 2nd and 3rd peaks at different intervals. var templates = GetLconvexTemplates(callBinWidth, silenceBinBuffer); int dominantFrequency = (int)configuration.GetIntOrNull("DominantFrequency"); // NOTE: could give user control over other call features // Such as frequency gap between peaks. But not in this first iteration of the recognizer. //int peakGapInHerz = (int)configuration["PeakGap"]; //int minHz = (int)configuration[AnalysisKeys.MinHz]; //int F1AndF2BinGap = (int)Math.Round(peakGapInHerz / herzPerBin); //int F1AndF3BinGap = 2 * F1AndF2BinGap; int hzBuffer = 250; int dominantBin = (int)Math.Round(dominantFrequency / herzPerBin); int binBuffer = (int)Math.Round(hzBuffer / herzPerBin); int dominantBinMin = dominantBin - binBuffer; int dominantBinMax = dominantBin + binBuffer; //int bandwidth = dominantBinMax - dominantBinMin + 1; int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] scores = new double[rowCount]; // predefinition of score array double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. // mark the hits in hitMatrix for (int s = 0; s < rowCount; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = -double.MaxValue; int maxId = 0; // loop through bandwidth of L.onvex call and look for dominant frequency for (int binId = 5; binId < dominantBinMax; binId++) { if (spectrum[binId] > maxAmplitude) { maxAmplitude = spectrum[binId]; maxId = binId; } } if (maxId < dominantBinMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } scores[s] = maxAmplitude; dominantBins[s] = maxId; // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // loop through all spectra // We now have a list of potential hits for LimCon. This needs to be filtered. Plot.FindStartsAndEndsOfScoreEvents(scores, eventThresholdDb, minFrameWidth, maxFrameWidth, out var prunedScores, out var startEnds); // loop through the score array and find beginning and end of potential events var potentialEvents = new List <AcousticEvent>(); foreach (Point point in startEnds) { // get average of the dominant bin int binSum = 0; int binCount = 0; int eventWidth = point.Y - point.X + 1; for (int s = point.X; s <= point.Y; s++) { if (dominantBins[s] >= dominantBinMin) { binSum += dominantBins[s]; binCount++; } } // find average dominant bin for the event int avDominantBin = (int)Math.Round(binSum / (double)binCount); int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * herzPerBin); // Get score for the event. // Use a simple template for the honk and calculate cosine similarity to the template. // Template has three dominant frequenices. var eventMatrix = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1); double[] eventAsVector = MatrixTools.SumColumns(eventMatrix); GetEventScore(eventAsVector, templates, out double eventScore, out int templateId); // put hits into hits matrix // put cosine score into the score array for (int s = point.X; s <= point.Y; s++) { hits[s, avDominantBin] = 10; prunedScores[s] = eventScore; } if (eventScore < similarityThreshold) { continue; } int topBinForEvent = avDominantBin + 2; int bottomBinForEvent = topBinForEvent - callBinWidth; int topFreqForEvent = (int)Math.Round(topBinForEvent * herzPerBin); int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin); double startTime = point.X * frameStepInSeconds; double durationTime = eventWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent) { //Name = string.Empty, // remove name because it hides spectral content of the event. Name = "L.c" + templateId, DominantFreq = avDominantFreq, Score = eventScore, }; newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin); potentialEvents.Add(newEvent); } // display the original score array scores = DataTools.normalise(scores); var debugPlot = new Plot(this.DisplayName, scores, similarityThreshold); var debugPlots = new List <Plot> { debugPlot }; if (this.displayDebugImage) { Image debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // display the cosine similarity scores var plot = new Plot(this.DisplayName, prunedScores, similarityThreshold); var plots = new List <Plot> { plot }; // add names into the returned events string speciesName = configuration[AnalysisKeys.SpeciesName] ?? this.SpeciesName; foreach (var ae in potentialEvents) { ae.Name = abbreviatedSpeciesName; ae.SpeciesName = speciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new LitoriaCaeruleaConfig(); recognizerConfig.ReadConfigFile(configuration); // common properties string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no name>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // BETTER TO SET THESE. IGNORE USER! // This framesize is large because the oscillation we wish to detect is due to repeated croaks // having an interval of about 0.6 seconds. The overlap is also required to give smooth oscillation. const int frameSize = 2048; const double windowOverlap = 0.5; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // use the default HAMMING window //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.None NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.0, }; TimeSpan recordingDuration = recording.WavReader.Time; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = sr / (sonoConfig.WindowSize * (1 - windowOverlap)); //int dominantFreqBin = (int)Math.Round(recognizerConfig.DominantFreq / freqBinWidth) + 1; int minBin = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1; var decibelThreshold = 9.0; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER int rowCount = sonogram.Data.GetLength(0); // get the freq band as set by min and max Herz var frogBand = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // Now look for spectral maxima. For L.caerulea, the max should lie around 1100Hz +/-150 Hz. // Skip over spectra where maximum is not in correct location. int buffer = 150; var croakScoreArray = new double[rowCount]; var hzAtTopOfTopBand = recognizerConfig.DominantFreq + buffer; var hzAtBotOfTopBand = recognizerConfig.DominantFreq - buffer; var binAtTopOfTopBand = (int)Math.Round((hzAtTopOfTopBand - recognizerConfig.MinHz) / freqBinWidth); var binAtBotOfTopBand = (int)Math.Round((hzAtBotOfTopBand - recognizerConfig.MinHz) / freqBinWidth); // scan the frog band and get the decibel value of those spectra which have their maximum within the correct subband. for (int x = 0; x < rowCount; x++) { //extract spectrum var spectrum = MatrixTools.GetRow(frogBand, x); int maxIndex = DataTools.GetMaxIndex(spectrum); if (spectrum[maxIndex] < decibelThreshold) { continue; } if (maxIndex < binAtTopOfTopBand && maxIndex > binAtBotOfTopBand) { croakScoreArray[x] = spectrum[maxIndex]; } } // Perpare a normalised plot for later display with spectrogram double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var text1 = string.Format($"Croak scores (threshold={decibelThreshold})"); var croakPlot1 = new Plot(text1, normalisedScores, normalisedThreshold); // extract potential croak events from the array of croak candidate var croakEvents = AcousticEvent.ConvertScoreArray2Events( croakScoreArray, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, recognizerConfig.EventThreshold, recognizerConfig.MinCroakDuration, recognizerConfig.MaxCroakDuration, segmentStartOffset); // add necesary info into the candidate events var prunedEvents = new List <AcousticEvent>(); foreach (var ae in croakEvents) { // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; prunedEvents.Add(ae); } // With those events that survive the above Array2Events process, we now extract a new array croak scores croakScoreArray = AcousticEvent.ExtractScoreArrayFromEvents(prunedEvents, rowCount, recognizerConfig.AbbreviatedSpeciesName); DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var text2 = string.Format($"Croak events (threshold={decibelThreshold})"); var croakPlot2 = new Plot(text2, normalisedScores, normalisedThreshold); // Look for oscillations in the difference array // duration of DCT in seconds //croakScoreArray = DataTools.filterMovingAverageOdd(croakScoreArray, 5); double dctDuration = recognizerConfig.DctDuration; // minimum acceptable value of a DCT coefficient double dctThreshold = recognizerConfig.DctThreshold; double minOscRate = 1 / recognizerConfig.MaxPeriod; double maxOscRate = 1 / recognizerConfig.MinPeriod; var dctScores = Oscillations2012.DetectOscillations(croakScoreArray, framesPerSecond, dctDuration, minOscRate, maxOscRate, dctThreshold); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER var events = AcousticEvent.ConvertScoreArray2Events( dctScores, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, recognizerConfig.EventThreshold, recognizerConfig.MinDuration, recognizerConfig.MaxDuration, segmentStartOffset); double[,] hits = null; prunedEvents = new List <AcousticEvent>(); foreach (var ae in events) { // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; prunedEvents.Add(ae); } // do a recognizer test. if (MainEntry.InDEBUG) { //TestTools.RecognizerScoresTest(scores, new FileInfo(recording.FilePath)); //AcousticEvent.TestToCompareEvents(prunedEvents, new FileInfo(recording.FilePath)); } var scoresPlot = new Plot(this.DisplayName, dctScores, recognizerConfig.EventThreshold); if (true) { // display a variety of debug score arrays // calculate amplitude at location double[] amplitudeArray = MatrixTools.SumRows(frogBand); DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scoresPlot, croakPlot2, croakPlot1, amplPlot }; // NOTE: This DrawDebugImage() method can be over-written in this class. var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits); var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); debugImage.Save(debugPath); } return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = scoresPlot.AsList(), Events = prunedEvents, //Events = events }); }
} //Execute() public static Output GetInstanceRepresentations(Arguments arguments) { LoggedConsole.WriteLine("1. Read in all Instances and do feature extraction"); //################################### FEATURE WEIGHTS //TRY DIFFERENT WEIGHTINGS assuming following "SPT,RHZ,RVT,RPS,RNG"; bool doDeltaFeatures = false; double[] weights = { 1.0, 1.0, 0.8, 0.7, 0.7 }; double[] deltaWeights = { 1.0, 1.0, 0.8, 0.7, 0.7, 0.5, 0.4, 0.4, 0.2, 0.2 }; if (doDeltaFeatures) { weights = deltaWeights; } //MAX-POOLING for SPECTRAL REDUCTION // frequency bins used to reduce dimensionality of the 256 spectral values. int startBin = 8; int maxOf2Bin = 117; int maxOf3Bin = 160; int endBin = 200; double[] testArray = new double[256]; for (int i = 0; i < testArray.Length; i++) { testArray[i] = i; } double[] reducedArray = MaxPoolingLimited(testArray, startBin, maxOf2Bin, maxOf3Bin, endBin); int reducedSpectralLength = reducedArray.Length; LoggedConsole.WriteLine(" Reduced spectral length = " + reducedSpectralLength); int instanceCount = arguments.InstanceCount; int speciesCount = arguments.SpeciesCount; // READ IN THE SPECIES LABELS FILE AND SET UP THE DATA string[] fileID = new string[instanceCount]; int[] speciesID = new int[speciesCount]; ReadGlotinsSpeciesLabelFile(arguments.SpeciesLabelsFile, instanceCount, out fileID, out speciesID); // INIT array of species counts int[] instanceNumbersPerSpecies = new int[speciesCount]; // INIT array of frame counts int[] frameNumbersPerInstance = new int[instanceCount]; // initialise species description matrix var keyArray = FEATURE_KEYS.Split(','); int totalFeatureCount = keyArray.Length * reducedArray.Length; Console.WriteLine(" Total Feature Count = " + totalFeatureCount); if (doDeltaFeatures) { totalFeatureCount *= 2; LoggedConsole.WriteLine(" Total Delta Feature Count = " + totalFeatureCount); } // one matrix row per species double[,] instanceFeatureMatrix = new double[instanceCount, totalFeatureCount]; // loop through all all instances for (int j = 0; j < instanceCount; j++) { LoggedConsole.Write("."); int frameCount = 0; // get the spectral index files int speciesLabel = speciesID[j]; // dictionary to store feature spectra for instance. var aggreDictionary = new Dictionary <string, double[]>(); // dictionary to store delta spectra for instance. var deltaDictionary = new Dictionary <string, double[]>(); foreach (string key in keyArray) { string name = string.Format("{0}_Species{1:d2}.{2}.csv", fileID[j], speciesLabel, key); FileInfo file = new FileInfo(Path.Combine(arguments.InputDataDirectory.FullName, name)); if (file.Exists) { int binCount; double[,] matrix = IndexMatrices.ReadSpectrogram(file, out binCount); // create or get the array of spectral values. double[] aggregateArray = new double[reducedSpectralLength]; double[] deltaArray = new double[reducedSpectralLength]; double[] ipVector = MatrixTools.GetRow(matrix, 0); ipVector = DataTools.SubtractValueAndTruncateToZero(ipVector, arguments.BgnThreshold); reducedArray = MaxPoolingLimited(ipVector, startBin, maxOf2Bin, maxOf3Bin, endBin); double[] previousArray = reducedArray; // transfer spectral values to array. int rowCount = matrix.GetLength(0); //rowCount = (int)Math.Round(rowCount * 0.99); // ###################### USE ONLY 99% of instance //if (rowCount > 1200) rowCount = 1200; for (int r = 1; r < rowCount; r++) { ipVector = MatrixTools.GetRow(matrix, r); ipVector = DataTools.SubtractValueAndTruncateToZero(ipVector, arguments.BgnThreshold); reducedArray = MaxPoolingLimited(ipVector, startBin, maxOf2Bin, maxOf3Bin, endBin); for (int c = 0; c < reducedSpectralLength; c++) { aggregateArray[c] += reducedArray[c]; // Calculate the DELTA values TWO OPTIONS ################################################## double delta = Math.Abs(reducedArray[c] - previousArray[c]); //double delta = reducedArray[c] - previousArray[c]; //if (delta < 0.0) delta = 0.0; //double delta = previousArray[c]; //previous array - i.e. do not calculate delta deltaArray[c] += delta; } previousArray = reducedArray; } aggreDictionary[key] = aggregateArray; deltaDictionary[key] = deltaArray; frameCount = rowCount; } //if (file.Exists) } //foreach (string key in keyArray) instanceNumbersPerSpecies[speciesLabel - 1]++; frameNumbersPerInstance[j] += frameCount; // create the matrix of instance descriptions which consists of concatenated vectors // j = index of instance ID = row number int featureID = 0; foreach (string key in keyArray) { int featureOffset = featureID * reducedSpectralLength; for (int c = 0; c < reducedSpectralLength; c++) { // TWO OPTIONS: SUM OR AVERAGE ###################################### //instanceFeatureMatrix[j, featureOffset + c] = dictionary[key][c]; instanceFeatureMatrix[j, featureOffset + c] = aggreDictionary[key][c] / frameCount; } featureID++; } if (doDeltaFeatures) { foreach (string key in keyArray) { int featureOffset = featureID * reducedSpectralLength; for (int c = 0; c < reducedSpectralLength; c++) { // TWO OPTIONS: SUM OR AVERAGE ###################################### //instanceFeatureMatrix[j, featureOffset + c] = dictionary[key][c]; instanceFeatureMatrix[j, featureOffset + c] = deltaDictionary[key][c] / frameCount; } featureID++; } } // if doDeltaFeatures } // end for loop j over all instances LoggedConsole.WriteLine("Done!"); LoggedConsole.WriteLine("\nSum of species number array = " + instanceNumbersPerSpecies.Sum()); LoggedConsole.WriteLine("Sum of frame number array = " + frameNumbersPerInstance.Sum()); bool addLineNumbers = true; string countsArrayOutputFilePath = Path.Combine(arguments.OutputDirectory.FullName, "BirdClef50_training_Counts.txt"); FileTools.WriteArray2File(instanceNumbersPerSpecies, addLineNumbers, countsArrayOutputFilePath); // Initialise output data arrays Output output = new Output(); output.FileID = fileID; output.SpeciesID = speciesID; output.InstanceNumbersPerSpecies = instanceNumbersPerSpecies; output.ReducedSpectralLength = reducedSpectralLength; // INIT array of frame counts output.FrameNumbersPerInstance = frameNumbersPerInstance; // matrix: each row= one instance; each column = one feature output.InstanceFeatureMatrix = instanceFeatureMatrix; output.Weights = weights; return(output); } // GetInstanceRepresentations()
/// <summary> /// This method assumes that the ribbon spectrograms are composed using the following five indices for RGB /// string[] colourKeys1 = { "ACI", "ENT", "EVN" };. /// string[] colourKeys2 = { "BGN", "PMN", "EVN" };. /// </summary> public static double[,] ReadSpectralIndicesFromTwoFalseColourSpectrogramRibbons(Image image1, Image image2, TimeSpan startTime, TimeSpan duration) { //get start and end minutes int startMinute = (int)startTime.TotalMinutes; int minuteSpan = (int)duration.TotalMinutes; int endMinute = startMinute + minuteSpan; // get index matrices from the two images var matrixList1 = ReadSpectralIndicesFromFalseColourSpectrogram((Image <Rgb24>)image1, startMinute, endMinute); var matrixList2 = ReadSpectralIndicesFromFalseColourSpectrogram((Image <Rgb24>)image2, startMinute, endMinute); //set up the return Matrix containing 1440 rows and 5 x 32 indices var rowCount = matrixList1[0].GetLength((0)); var colCount = matrixList1[0].GetLength((1)); var indexCount = colCount * 5; // 5 because will incorporate 5 indices var matrix = new double[rowCount, indexCount]; // copy indices into return matrix for (int r = 0; r < rowCount; r++) { // copy in ACI row var row = MatrixTools.GetRow(matrixList1[0], r); for (int c = 0; c < colCount; c++) { matrix[r, c] = row[c]; } // copy in ENT row row = MatrixTools.GetRow(matrixList1[1], r); for (int c = 0; c < colCount; c++) { int startColumn = colCount; matrix[r, startColumn + c] = row[c]; } // copy in EVN row row = MatrixTools.GetRow(matrixList1[2], r); for (int c = 0; c < colCount; c++) { int startColumn = colCount * 2; matrix[r, startColumn + c] = row[c]; } // copy in BGN row row = MatrixTools.GetRow(matrixList2[0], r); for (int c = 0; c < colCount; c++) { int startColumn = colCount * 3; matrix[r, startColumn + c] = row[c]; } // copy in PMN row row = MatrixTools.GetRow(matrixList2[1], r); for (int c = 0; c < colCount; c++) { int startColumn = colCount * 4; matrix[r, startColumn + c] = row[c]; } } return(matrix); }
internal RecognizerResults Algorithm1(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { double noiseReductionParameter = configuration.GetDoubleOrNull("BgNoiseThreshold") ?? 0.1; // make a spectrogram var config = new SonogramConfig { WindowSize = 256, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, WindowOverlap = 0.0, }; // now construct the standard decibel spectrogram WITH noise removal // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); sonogram.Data = spg; int sampleRate = audioRecording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); // double epsilon = Math.Pow(0.5, audioRecording.BitsPerSample - 1); int frameSize = colCount * 2; int frameStep = frameSize; // this default = zero overlap // double frameDurationInSeconds = frameSize / (double)sampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; double herzPerBin = sampleRate / 2 / (double)colCount; // string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; // string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventDecibelThreshold = configuration.GetDoubleOrNull("EventDecibelThreshold") ?? 6.0; // minimum score for an acceptable event - that is when processing the score array. double eventSimilarityThreshold = configuration.GetDoubleOrNull("EventSimilarityThreshold") ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512. int minFrameWidth = 2; int maxFrameWidth = 5; // this is larger than actual to accomodate an echo. // double minDuration = (minFrameWidth - 1) * frameStepInSeconds; // double maxDuration = maxFrameWidth * frameStepInSeconds; // minimum number of bins covering frequency bandwidth of call int callBinWidth = 19; // # The PlatyplectrumOrnatum call has a duration of 3-5 frames given the above settings. // To this end we produce two templates. var templates = GetTemplatesForAlgorithm1(callBinWidth); int dominantFrequency = configuration.GetInt("DominantFrequency"); // NOTE: could give user control over other call features // Such as frequency gap between peaks. But not in this first iteration of the recognizer. //int peakGapInHerz = (int)configuration["PeakGap"]; //int minHz = (int)configuration[AnalysisKeys.MinHz]; //int F1AndF2BinGap = (int)Math.Round(peakGapInHerz / herzPerBin); //int F1AndF3BinGap = 2 * F1AndF2BinGap; int hzBuffer = 100; int dominantBin = (int)Math.Round(dominantFrequency / herzPerBin); int binBuffer = (int)Math.Round(hzBuffer / herzPerBin); int dominantBinMin = dominantBin - binBuffer; int dominantBinMax = dominantBin + binBuffer; // int bandwidth = dominantBinMax - dominantBinMin + 1; int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] amplitudeScores = new double[rowCount]; // predefinition of amplitude score array double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical. // mark the hits in hitMatrix for (int s = 0; s < rowCount; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = -double.MaxValue; int maxId = 0; // loop through bandwidth of call and look for dominant frequency for (int binId = 5; binId < dominantBinMax; binId++) { if (spectrum[binId] > maxAmplitude) { maxAmplitude = spectrum[binId]; maxId = binId; } } if (maxId < dominantBinMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } amplitudeScores[s] = maxAmplitude; dominantBins[s] = maxId; // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // loop through all spectra // We now have a list of potential hits. This needs to be filtered. Plot.FindStartsAndEndsOfScoreEvents(amplitudeScores, eventDecibelThreshold, minFrameWidth, maxFrameWidth, out var prunedScores, out var startEnds); // loop through the score array and find beginning and end of potential events var potentialEvents = new List <AcousticEvent>(); foreach (Point point in startEnds) { // get average of the dominant bin int binSum = 0; int binCount = 0; int eventWidth = point.Y - point.X + 1; for (int s = point.X; s <= point.Y; s++) { if (dominantBins[s] >= dominantBinMin) { binSum += dominantBins[s]; binCount++; } } // find average dominant bin for the event int avDominantBin = (int)Math.Round(binSum / (double)binCount); int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * herzPerBin); // Get score for the event. // Use a simple template for the honk and calculate cosine similarity to the template. // Template has three dominant frequenices. var eventMatrix = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1); double eventScore = GetEventScore(eventMatrix, templates); // put hits into hits matrix // put cosine score into the score array for (int s = point.X; s <= point.Y; s++) { hits[s, avDominantBin] = 10; prunedScores[s] = eventScore; } if (eventScore < eventSimilarityThreshold) { continue; } int topBinForEvent = avDominantBin + 2; int bottomBinForEvent = topBinForEvent - callBinWidth; int topFreqForEvent = (int)Math.Round(topBinForEvent * herzPerBin); int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin); double startTime = point.X * frameStepInSeconds; double durationTime = eventWidth * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent) { DominantFreq = avDominantFreq, Score = eventScore, // remove name because it hides spectral content in display of the event. Name = string.Empty, }; newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin); potentialEvents.Add(newEvent); } // calculate the cosine similarity scores var plot = new Plot(this.DisplayName, prunedScores, eventSimilarityThreshold); var plots = new List <Plot> { plot }; //DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display the original decibel score array DataTools.Normalise(amplitudeScores, eventDecibelThreshold, out var normalisedScores, out var normalisedThreshold); var debugPlot = new Plot(this.DisplayName, normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { debugPlot, plot }; var debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName( Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // add names into the returned events foreach (var ae in potentialEvents) { ae.Name = "P.o"; // abbreviatedSpeciesName; } return(new RecognizerResults() { Events = potentialEvents, Hits = hits, Plots = plots, Sonogram = sonogram, }); }
public void TestFreqScaleOnArtificialSignal2() { int sampleRate = 64000; double duration = 30; // signal duration in seconds int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal2_OctaveFreqScale.png"); var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); // init the default sonogram config var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = 0.2, SourceFName = "Signal2", NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); // pick a row, any row var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); oneSpectrum = DataTools.filterMovingAverage(oneSpectrum, 5); var peaks = DataTools.GetPeaks(oneSpectrum); var peakIds = new List <int>(); for (int i = 5; i < peaks.Length - 5; i++) { if (peaks[i]) { int peakId = freqScale.BinBounds[i, 0]; peakIds.Add(peakId); LoggedConsole.WriteLine($"Spectral peak located in bin {peakId}, Herz={freqScale.BinBounds[i, 1]}"); } } foreach (int h in harmonics) { LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); } Assert.AreEqual(5, peakIds.Count); Assert.AreEqual(129, peakIds[0]); Assert.AreEqual(257, peakIds[1]); Assert.AreEqual(513, peakIds[2]); Assert.AreEqual(1025, peakIds[3]); Assert.AreEqual(2049, peakIds[4]); var image = sonogram.GetImage(); string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sampleRate} Window={freqScale.WindowSize}"; image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); image.Save(outputImagePath); // Check that image dimensions are correct Assert.AreEqual(146, image.Width); Assert.AreEqual(310, image.Height); }
} //DetectBarsInTheRowsOfaMatrix() /// A METHOD TO DETECT HARMONICS IN THE ROWS of the passed portion of a sonogram. /// This method assume the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of sonogram. /// Was first developed for crow calls. /// First looks for a decibel profile that matches the passed call duration and decibel loudness /// Then samples the centre portion for the correct harmonic period. /// </summary> /// <param name="m"></param> /// <param name="amplitudeThreshold"></param> /// <returns></returns> public static Tuple <double[], double[], double[]> DetectHarmonicsInSonogramMatrix(double[,] m, double dBThreshold, int callSpan) { int zeroBinCount = 3; //to remove low freq content which dominates the spectrum int halfspan = callSpan / 2; double[] dBArray = MatrixTools.GetRowAverages(m); dBArray = DataTools.filterMovingAverage(dBArray, 3); bool doNoiseRemoval = true; if (doNoiseRemoval) { double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction SNR.BackgroundNoise bgn = SNR.SubtractBackgroundNoiseFromSignal(dBArray, StandardDeviationCount); dBArray = bgn.NoiseReducedSignal; } bool[] peaks = DataTools.GetPeaks(dBArray); int rowCount = m.GetLength(0); int colCount = m.GetLength(1); var intensity = new double[rowCount]; //an array of period intensity var periodicity = new double[rowCount]; //an array of the periodicity values for (int r = halfspan; r < rowCount - halfspan; r++) { //APPLY A FILTER: must satisfy the following conditions for a call. if (!peaks[r]) { continue; } if (dBArray[r] < dBThreshold) { continue; } double lowerDiff = dBArray[r] - dBArray[r - halfspan]; double upperDiff = dBArray[r] - dBArray[r + halfspan]; if (lowerDiff < dBThreshold || upperDiff < dBThreshold) { continue; } double[] prevRow = DataTools.DiffFromMean(MatrixTools.GetRow(m, r - 1)); double[] thisRow = DataTools.DiffFromMean(MatrixTools.GetRow(m, r)); var spectrum = AutoAndCrossCorrelation.CrossCorr(prevRow, thisRow); for (int s = 0; s < zeroBinCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double intensityValue = spectrum[maxId]; intensity[r] = intensityValue; double period = 0.0; if (maxId != 0) { period = 2 * colCount / (double)maxId; } periodicity[r] = period; prevRow = thisRow; } // rows return(Tuple.Create(dBArray, intensity, periodicity)); } //DetectHarmonicsInSonogramMatrix()
/// <summary> /// The CORE ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double decibelThreshold = double.Parse(configDict["DECIBEL_THRESHOLD"]); //dB double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double callDuration = double.Parse(configDict["CALL_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); int callSpan = (int)Math.Round(callDuration * framesPerSecond); //############################################################################################################################################# //ii: DETECT HARMONICS var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan); double[] dBArray = results.Item1; double[] intensity = results.Item2; //an array of periodicity scores double[] periodicity = results.Item3; //intensity = DataTools.filterMovingAverage(intensity, 3); int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < harmonicIntensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. //expect humans to have max < 1000 Hz double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < noiseBound; j++) { spectrum[j] = 0.0; } int maxIndex = DataTools.GetMaxIndex(spectrum); int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); double discount = 1.0; if (freqWithMaxPower < 1200) { discount = 0.0; } if (intensity[r] > harmonicIntensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS double maxPossibleScore = 0.5; int halfCallSpan = callSpan / 2; var predictedEvents = new List <AcousticEvent>(); for (int i = 0; i < rowCount; i++) { //assume one score position per crow call if (scoreArray[i] < 0.001) { continue; } double startTime = (i - halfCallSpan) / framesPerSecond; AcousticEvent ev = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz); ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth); ev.Score = scoreArray[i]; ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold //ev.Score_MaxPossible = maxPossibleScore; predictedEvents.Add(ev); } //for loop Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// Algorithm2: /// 1: Loop through spgm and find dominant freq bin and its amplitude in each frame /// 2: If frame passes amplitude test, then calculate a similarity cosine score for that frame. Simlarity score is wrt a template matrix. /// 3: If similarity score exceeds threshold, then assign event score based on the amplitude. /// </summary> internal RecognizerResults Algorithm2(AudioRecording recording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset) { double noiseReductionParameter = configuration.GetDoubleOrNull("BgNoiseThreshold") ?? 0.1; // make a spectrogram var config = new SonogramConfig { WindowSize = 256, NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = noiseReductionParameter, WindowOverlap = 0.0, }; // now construct the standard decibel spectrogram WITH noise removal // get frame parameters for the analysis var sonogram = (BaseSonogram) new SpectrogramStandard(config, recording.WavReader); // remove the DC column var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); sonogram.Data = spg; int sampleRate = recording.SampleRate; int rowCount = spg.GetLength(0); int colCount = spg.GetLength(1); //double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); int frameSize = colCount * 2; int frameStep = frameSize; // this default = zero overlap //double frameDurationInSeconds = frameSize / (double)sampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; double herzPerBin = sampleRate / 2.0 / colCount; //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // ## THREE THRESHOLDS ---- only one of these is given to user. // minimum dB to register a dominant freq peak. After noise removal double peakThresholdDb = 3.0; // The threshold dB amplitude in the dominant freq bin required to yield an event double eventDecibelThreshold = configuration.GetDoubleOrNull("EventDecibelThreshold") ?? 6.0; // minimum score for an acceptable event - that is when processing the score array. double eventSimilarityThreshold = configuration.GetDoubleOrNull("EventSimilarityThreshold") ?? 0.2; // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512. //int minFrameWidth = 2; //int maxFrameWidth = 5; // this is larger than actual to accomodate an echo. //double minDuration = (minFrameWidth - 1) * frameStepInSeconds; //double maxDuration = maxFrameWidth * frameStepInSeconds; // minimum number of frames and bins covering the call // The PlatyplectrumOrnatum call has a duration of 3-5 frames GIVEN THE ABOVE SAMPLING and WINDOW SETTINGS! // Get the call templates and their dimensions var templates = GetTemplatesForAlgorithm2(out var callFrameDuration, out var callBinWidth); int dominantFrequency = configuration.GetInt("DominantFrequency"); const int hzBuffer = 100; int dominantBin = (int)Math.Round(dominantFrequency / herzPerBin); int binBuffer = (int)Math.Round(hzBuffer / herzPerBin); int dominantBinMin = dominantBin - binBuffer; int dominantBinMax = dominantBin + binBuffer; int bottomBin = 1; int topBin = bottomBin + callBinWidth - 1; int[] dominantBins = new int[rowCount]; // predefinition of events max frequency double[] similarityScores = new double[rowCount]; // predefinition of score array double[] amplitudeScores = new double[rowCount]; double[,] hits = new double[rowCount, colCount]; // loop through all spectra/rows of the spectrogram // NB: the spectrogram is rotated to vertical, i.e. rows = spectra, columns= freq bins mark the hits in hitMatrix for (int s = 1; s < rowCount - callFrameDuration; s++) { double[] spectrum = MatrixTools.GetRow(spg, s); double maxAmplitude = -double.MaxValue; int maxId = 0; // loop through bandwidth of call and look for dominant frequency for (int binId = 8; binId <= dominantBinMax; binId++) { if (spectrum[binId] > maxAmplitude) { maxAmplitude = spectrum[binId]; maxId = binId; } } if (maxId < dominantBinMin) { continue; } // peak should exceed thresold amplitude if (spectrum[maxId] < peakThresholdDb) { continue; } //now calculate similarity with template var locality = MatrixTools.Submatrix(spg, s - 1, bottomBin, s + callFrameDuration - 2, topBin); // s-1 because first row of template is zeros. int localMaxBin = maxId - bottomBin; double callAmplitude = (locality[1, localMaxBin] + locality[2, localMaxBin] + locality[3, localMaxBin]) / 3.0; // use the following lines to write out call templates for use as recognizer //double[] columnSums = MatrixTools.SumColumns(locality); //if (columnSums[maxId - bottomBin] < 80) continue; //FileTools.WriteMatrix2File(locality, "E:\\SensorNetworks\\Output\\Frogs\\TestOfRecognizers-2016October\\Towsey.PlatyplectrumOrnatum\\Locality_S"+s+".csv"); double score = DataTools.CosineSimilarity(locality, templates[0]); if (score > eventSimilarityThreshold) { similarityScores[s] = score; dominantBins[s] = maxId; amplitudeScores[s] = callAmplitude; } } // loop through all spectra // loop through all spectra/rows of the spectrogram for a second time // NB: the spectrogram is rotated to vertical, i.e. rows = spectra, columns= freq bins // We now have a list of potential hits. This needs to be filtered. Mark the hits in hitMatrix var events = new List <AcousticEvent>(); for (int s = 1; s < rowCount - callFrameDuration; s++) { // find peaks in the array of similarity scores. First step, only look for peaks if (similarityScores[s] < similarityScores[s - 1] || similarityScores[s] < similarityScores[s + 1]) { continue; } // require three consecutive similarity scores to be above the threshold if (similarityScores[s + 1] < eventSimilarityThreshold || similarityScores[s + 2] < eventSimilarityThreshold) { continue; } // now check the amplitude if (amplitudeScores[s] < eventDecibelThreshold) { continue; } // have an event // find average dominant bin for the event int avDominantBin = (dominantBins[s] + dominantBins[s] + dominantBins[s]) / 3; int avDominantFreq = (int)Math.Round(avDominantBin * herzPerBin); int topBinForEvent = avDominantBin + 3; int bottomBinForEvent = topBinForEvent - callBinWidth; int topFreqForEvent = (int)Math.Round(topBinForEvent * herzPerBin); int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin); hits[s, avDominantBin] = 10; double startTime = s * frameStepInSeconds; double durationTime = 4 * frameStepInSeconds; var newEvent = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent) { DominantFreq = avDominantFreq, Score = amplitudeScores[s], // remove name because it hides spectral content in display of the event. Name = string.Empty, }; newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin); events.Add(newEvent); } // loop through all spectra // display the amplitude scores DataTools.Normalise(amplitudeScores, eventDecibelThreshold, out var normalisedScores, out var normalisedThreshold); var plot = new Plot(this.DisplayName, normalisedScores, normalisedThreshold); var plots = new List <Plot> { plot }; //DEBUG IMAGE this recognizer only. MUST set false for deployment. bool displayDebugImage = MainEntry.InDEBUG; if (displayDebugImage) { // display the original decibel score array var debugPlot = new Plot("Similarity Score", similarityScores, eventSimilarityThreshold); var debugPlots = new List <Plot> { plot, debugPlot }; var debugImage = DisplayDebugImage(sonogram, events, debugPlots, hits); var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram")); debugImage.Save(debugPath.FullName); } // add names into the returned events foreach (var ae in events) { ae.Name = "P.o"; // abbreviatedSpeciesName; } return(new RecognizerResults() { Events = events, Hits = hits, Plots = plots, Sonogram = sonogram, }); }