}//Execute public static Tuple <double[]> Execute_MFCC_XCOR(double[,] target, double dynamicRange, SpectrogramStandard sonogram, List <AcousticEvent> segments, int minHz, int maxHz, double minDuration) { Log.WriteLine("SEARCHING FOR EVENTS LIKE TARGET."); if (segments == null) { return(null); } int minBin = (int)(minHz / sonogram.FBinWidth); int maxBin = (int)(maxHz / sonogram.FBinWidth); int targetLength = target.GetLength(0); //set up the matrix of cosine coefficients int coeffCount = 12; //only use first 12 coefficients. int binCount = target.GetLength(1); //number of filters in filter bank double[,] cosines = MFCCStuff.Cosines(binCount, coeffCount + 1); //set up the cosine coefficients //adjust target's dynamic range to that set by user target = SNR.SetDynamicRange(target, 3.0, dynamicRange); //set event's dynamic range target = MFCCStuff.Cepstra(target, coeffCount, cosines); double[] v1 = DataTools.Matrix2Array(target); v1 = DataTools.normalise2UnitLength(v1); string imagePath2 = @"C:\SensorNetworks\Output\FELT_Currawong\target.png"; var result1 = BaseSonogram.Data2ImageData(target); var image = result1.Item1; ImageTools.DrawMatrix(image, 1, 1, imagePath2); double[] scores = new double[sonogram.FrameCount]; foreach (AcousticEvent av in segments) { Log.WriteLine("SEARCHING SEGMENT."); int startRow = (int)Math.Round(av.TimeStart * sonogram.FramesPerSecond); int endRow = (int)Math.Round(av.TimeEnd * sonogram.FramesPerSecond); if (endRow >= sonogram.FrameCount) { endRow = sonogram.FrameCount - 1; } endRow -= targetLength; if (endRow <= startRow) { endRow = startRow + 1; //want minimum of one row } for (int r = startRow; r < endRow; r++) { double[,] matrix = DataTools.Submatrix(sonogram.Data, r, minBin, r + targetLength - 1, maxBin); matrix = SNR.SetDynamicRange(matrix, 3.0, dynamicRange); //set event's dynamic range //string imagePath2 = @"C:\SensorNetworks\Output\FELT_Gecko\compare.png"; //var image = BaseSonogram.Data2ImageData(matrix); //ImageTools.DrawMatrix(image, 1, 1, imagePath2); matrix = MFCCStuff.Cepstra(matrix, coeffCount, cosines); double[] v2 = DataTools.Matrix2Array(matrix); v2 = DataTools.normalise2UnitLength(v2); double crossCor = DataTools.DotProduct(v1, v2); scores[r] = crossCor; } //end of rows in segment } //foreach (AcousticEvent av in segments) var tuple = Tuple.Create(scores); return(tuple); }//Execute
static void ConvertStringToSql(StringBuilder stringBuilder, string value) { DataTools.ConvertStringToSql(stringBuilder, "||", null, AppendConversionAction, value, null); }
/// <summary> /// Calculate summary statistics for supplied temporal and spectral targets. /// </summary> /// <remarks> /// The acoustic statistics calculated in this method are based on methods outlined in /// "Acoustic classification of multiple simultaneous bird species: A multi-instance multi-label approach", /// by Forrest Briggs, Balaji Lakshminarayanan, Lawrence Neal, Xiaoli Z.Fern, Raviv Raich, Sarah J.K.Hadley, Adam S. Hadley, Matthew G. Betts, et al. /// The Journal of the Acoustical Society of America v131, pp4640 (2012); doi: http://dx.doi.org/10.1121/1.4707424 /// .. /// The Briggs feature are calculated from the column (freq bin) and row (frame) sums of the extracted spectrogram. /// 1. Gini Index for frame and bin sums. A measure of dispersion. Problem with gini is that its value is dependent on the row or column count. /// We use entropy instead because value not dependent on row or column count because it is normalized. /// For the following meausres of k-central moments, the freq and time values are normalized in 0,1 to width of the event. /// 2. freq-mean /// 3. freq-variance /// 4. freq-skew and kurtosis /// 5. time-mean /// 6. time-variance /// 7. time-skew and kurtosis /// 8. freq-max (normalized) /// 9. time-max (normalized) /// 10. Briggs et al also calculate a 16 value histogram of gradients for each event mask. We do not do that here although we could. /// ... /// NOTE 1: There are differences between our method of noise reduction and Briggs. Briggs does not convert to decibels /// and instead works with power values. He obtains a noise profile from the 20% of frames having the lowest energy sum. /// NOTE 2: To NormaliseMatrixValues for noise, they divide the actual energy by the noise value. This is equivalent to subtraction when working in decibels. /// There are advantages and disadvantages to Briggs method versus ours. In our case, we hve to convert decibel values back to /// energy values when calculating the statistics for the extracted acoustic event. /// NOTE 3: We do not calculate the higher central moments of the time/frequency profiles, i.e. skew and kurtosis. /// Ony mean and standard deviation. /// .. /// NOTE 4: This method assumes that the passed event occurs totally within the passed recording, /// AND that the passed recording is of sufficient duration to obtain reliable BGN noise profile /// BUT not so long as to cause memory constipation. /// </remarks> /// <param name="recording">as type AudioRecording which contains the event.</param> /// <param name="temporalTarget">Both start and end bounds - relative to the supplied recording.</param> /// <param name="spectralTarget">both bottom and top bounds in Hertz.</param> /// <param name="config">parameters that determine the outcome of the analysis.</param> /// <param name="segmentStartOffset">How long since the start of the recording this event occurred.</param> /// <returns>an instance of EventStatistics.</returns> public static EventStatistics AnalyzeAudioEvent( AudioRecording recording, Interval <TimeSpan> temporalTarget, Interval <double> spectralTarget, EventStatisticsConfiguration config, TimeSpan segmentStartOffset) { var stats = new EventStatistics { EventStartSeconds = temporalTarget.Minimum.TotalSeconds, EventEndSeconds = temporalTarget.Maximum.TotalSeconds, LowFrequencyHertz = spectralTarget.Minimum, HighFrequencyHertz = spectralTarget.Maximum, SegmentDurationSeconds = recording.Duration.TotalSeconds, SegmentStartSeconds = segmentStartOffset.TotalSeconds, }; // temporal target is supplied relative to recording, but not the supplied audio segment // shift coordinates relative to segment var localTemporalTarget = temporalTarget.Shift(-segmentStartOffset); if (!recording .Duration .AsIntervalFromZero(Topology.Inclusive) .Contains(localTemporalTarget)) { stats.Error = true; stats.ErrorMessage = $"Audio not long enough ({recording.Duration}) to analyze target ({localTemporalTarget})"; return(stats); } // convert recording to spectrogram int sampleRate = recording.SampleRate; double epsilon = recording.Epsilon; // extract the spectrogram var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, config.FrameSize, config.FrameStep); double hertzBinWidth = dspOutput1.FreqBinWidth; var stepDurationInSeconds = config.FrameStep / (double)sampleRate; var startFrame = (int)Math.Ceiling(localTemporalTarget.Minimum.TotalSeconds / stepDurationInSeconds); // subtract 1 frame because want to end before start of end point. var endFrame = (int)Math.Floor(localTemporalTarget.Maximum.TotalSeconds / stepDurationInSeconds) - 1; var bottomBin = (int)Math.Floor(spectralTarget.Minimum / hertzBinWidth); var topBin = (int)Math.Ceiling(spectralTarget.Maximum / hertzBinWidth); // Events can have their high value set to the nyquist. // Since the submatrix call below uses an inclusive upper bound an index out of bounds exception occurs in // these cases. So we just ask for the bin below. if (topBin >= config.FrameSize / 2) { topBin = (config.FrameSize / 2) - 1; } // Convert amplitude spectrogram to deciBels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // extract the required acoustic event var eventMatrix = MatrixTools.Submatrix(decibelSpectrogram, startFrame, bottomBin, endFrame, topBin); // Get the SNR of the event. This is just the max value in the matrix because noise reduced MatrixTools.MinMax(eventMatrix, out _, out double max); stats.SnrDecibels = max; // Now need to convert event matrix back to energy values before calculating other statistics eventMatrix = MatrixTools.Decibels2Power(eventMatrix); var columnAverages = MatrixTools.GetColumnAverages(eventMatrix); var rowAverages = MatrixTools.GetRowAverages(eventMatrix); // calculate the mean and temporal standard deviation in decibels NormalDist.AverageAndSD(rowAverages, out double mean, out double stddev); stats.MeanDecibels = 10 * Math.Log10(mean); stats.TemporalStdDevDecibels = 10 * Math.Log10(stddev); // calculate the frequency standard deviation in decibels NormalDist.AverageAndSD(columnAverages, out mean, out stddev); stats.FreqBinStdDevDecibels = 10 * Math.Log10(stddev); // calculate relative location of the temporal maximum int maxRowId = DataTools.GetMaxIndex(rowAverages); stats.TemporalMaxRelative = maxRowId / (double)rowAverages.Length; // calculate the entropy dispersion/concentration indices stats.TemporalEnergyDistribution = 1 - DataTools.EntropyNormalised(rowAverages); stats.SpectralEnergyDistribution = 1 - DataTools.EntropyNormalised(columnAverages); // calculate the spectral centroid and the dominant frequency double binCentroid = CalculateSpectralCentroid(columnAverages); stats.SpectralCentroid = (int)Math.Round(hertzBinWidth * binCentroid) + (int)spectralTarget.Minimum; int maxColumnId = DataTools.GetMaxIndex(columnAverages); stats.DominantFrequency = (int)Math.Round(hertzBinWidth * maxColumnId) + (int)spectralTarget.Minimum; // remainder of this method is to produce debugging images. Can comment out when not debugging. /* * var normalisedIndex = DataTools.NormaliseMatrixValues(columnAverages); * var image4 = GraphsAndCharts.DrawGraph("columnSums", normalisedIndex, 100); * string path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\columnSums.png"; * image4.Save(path4); * normalisedIndex = DataTools.NormaliseMatrixValues(rowAverages); * image4 = GraphsAndCharts.DrawGraph("rowSums", normalisedIndex, 100); * path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\rowSums.png"; * image4.Save(path4); */ return(stats); }
/// <summary> /// Implements the "Adaptive Level Equalisatsion" algorithm of Lamel et al, 1981 - with modifications for our signals. /// Units are assumed to be decibels. /// Returns the min and max frame dB AND the estimate MODAL or BACKGROUND noise for the signal array /// IF This modal noise is subtracted from each frame dB, the effect is to set set average background noise level = 0 dB. /// The algorithm is described in Lamel et al, 1981. /// USED TO SEGMENT A RECORDING INTO SILENCE AND VOCALISATION /// NOTE: noiseThreshold is passed as decibels. Original Lamel algorithm ONLY SEARCHES in range min to 10dB above min. /// /// This method debugged on 7 Aug 2018 using following command line arguments: /// audio2csv Y:\TheNatureConservency\Myanmar\20180517\site112\2018_02_14_Bar5\20180214_Bar5\20180214_101121_Bar5.wav Towsey.Acoustic.yml C:\Temp... -m True /// </summary> /// <param name="dBarray">signal in decibel values</param> /// <param name="minDb">minimum value in the passed array of decibel values</param> /// <param name="maxDb">maximum value in the passed array of decibel values</param> /// <param name="modeNoise">modal or background noise in decibels</param> /// <param name="sdNoise">estimated sd of the noies - assuming noise to be guassian</param> public static void CalculateNoiseUsingLamelsAlgorithm( double[] dBarray, out double minDb, out double maxDb, out double modeNoise, out double sdNoise) { // set constants double noiseThreshold_DB = 10.0; // dB var binCount = 100; // number of bins for histogram is FIXED double histogramBinWidth = noiseThreshold_DB / binCount; //ignore first N and last N frames when calculating background noise level because // sometimes these frames have atypically low signal values int buffer = 20; //ignore first N and last N frames when calculating background noise level //HOWEVER do not ignore them for short recordings! int arrayLength = dBarray.Length; if (arrayLength < 1000) { buffer = 0; //ie recording is < approx 11 seconds long } double min = double.MaxValue; double max = -double.MaxValue; for (int i = buffer; i < arrayLength - buffer; i++) { if (dBarray[i] < min) { min = dBarray[i]; } else if (dBarray[i] > max) { max = dBarray[i]; } } if (min <= SNR.MinimumDbBoundForEnvironmentalNoise) { min = SNR.MinimumDbBoundForEnvironmentalNoise; } // return the outs! minDb = min; maxDb = max; var histo = new int[binCount]; var absThreshold = minDb + noiseThreshold_DB; for (var i = 0; i < arrayLength; i++) { if (dBarray[i] <= absThreshold) { var id = (int)((dBarray[i] - minDb) / histogramBinWidth); if (id >= binCount) { id = binCount - 1; } else if (id < 0) { id = 0; } histo[id]++; } } var smoothHisto = DataTools.filterMovingAverage(histo, 3); //DataTools.writeBarGraph(histo); // find peak of lowBins histogram SNR.GetModeAndOneStandardDeviation(smoothHisto, out var indexOfMode, out var indexOfOneSd); // return remaining outs! modeNoise = min + ((indexOfMode + 1) * histogramBinWidth); // modal noise level sdNoise = (indexOfMode - indexOfOneSd) * histogramBinWidth; // SD of the noise }
/// <summary> /// This method rearranges the content of a false-colour spectrogram according to the acoustic cluster or acoustic state to which each minute belongs. /// The time scale is added in afterwards - must overwrite the previous time scale and title bar. /// THis method was writtent to examine the cluster content of recordings analysed by Mangalam using a 10x10 SOM. /// The output image was used in the paper presented by Michael Towsey to Ecoacoustics Congress 2016, at Michigan State University. /// </summary> public static void ExtractSOMClusters2() { string opDir = @"C:\SensorNetworks\Output\Mangalam_EcoAcCongress2016\"; string clusterFile = opDir + "Minute_cluster mapping - all.csv"; //string inputImagePath = @"C:\SensorNetworks\Output\Mangalam_EcoAcCongress2016\SERF Spectrogram SW 2010Oct14.png"; string inputImagePath = @"C:\SensorNetworks\Output\Mangalam_EcoAcCongress2016\SERF Spectrogram NW 2010Oct14.png"; string fileStem = "NW_14Oct"; //string fileStem = "SW_14Oct"; string opFileName = fileStem + ".SOM27AcousticClusters.png"; string title = string.Format("SOM CLUSTERS of ACOUSTIC INDICES: recording {0}", fileStem); int clusterCount = 27; // from Yvonne's method List <Pen> pens = ImageTools.GetColorPalette(clusterCount); Pen whitePen = new Pen(Color.White); Pen blackPen = new Pen(Color.Black); //SizeF stringSize = new SizeF(); Font stringFont = new Font("Arial", 12, FontStyle.Bold); //Font stringFont = new Font("Tahoma", 9); // assignment of cluster numbers to cluster LABEL string[] clusterLabel = { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a" }; // read the data file containing cluster sequence List <string> lines = FileTools.ReadTextFile(clusterFile); string[] words = null; for (int i = 0; i < lines.Count; i++) { if (lines[i].StartsWith(fileStem)) { words = lines[i].Split(','); break; } } // init histogram to accumulate the cluster counts int[] clusterHistogram = new int[clusterCount]; // init array of lists to know what minutes are assigned to what clusters. List <int>[] clusterArrays = new List <int> [clusterCount]; for (int i = 0; i < clusterCount; i++) { clusterArrays[i] = new List <int>(); } // construct cluster histogram and arrays for (int w = 1; w < words.Length; w++) { int clusterID = int.Parse(words[w]); clusterHistogram[clusterID - 1]++; clusterArrays[clusterID - 1].Add(w); } // ranks cluster counts in descending order Tuple <int[], int[]> tuple = DataTools.SortArray(clusterHistogram); int[] sortOrder = tuple.Item1; //read in the image FileInfo fi = new FileInfo(inputImagePath); if (!fi.Exists) { Console.WriteLine("\n\n >>>>>>>> FILE DOES NOT EXIST >>>>>>: " + fi.Name); } Console.WriteLine("Reading file: " + fi.Name); Bitmap ipImage = ImageTools.ReadImage2Bitmap(fi.FullName); int imageWidth = ipImage.Width; int imageHt = ipImage.Height; //init the output image int opImageWidth = imageWidth + (2 * clusterCount); Image opImage = new Bitmap(opImageWidth, imageHt); Graphics gr = Graphics.FromImage(opImage); gr.Clear(Color.Black); // this loop re int opColumnNumber = 0; int clusterStartColumn = 0; for (int id = 0; id < clusterCount; id++) { int sortID = sortOrder[id]; Console.WriteLine("Reading CLUSTER: " + (sortID + 1) + " Label=" + clusterLabel[sortID]); int[] minutesArray = clusterArrays[sortID].ToArray(); clusterStartColumn = opColumnNumber; // read through the entire list of minutes for (int m = 0; m < minutesArray.Length; m++) { // get image column Rectangle rectangle = new Rectangle(minutesArray[m] - 1, 0, 1, imageHt); Bitmap column = ipImage.Clone(rectangle, ipImage.PixelFormat); gr.DrawImage(column, opColumnNumber, 0); opColumnNumber++; } // draw in separators gr.DrawLine(whitePen, opColumnNumber, 0, opColumnNumber, imageHt - 1); opColumnNumber++; gr.DrawLine(whitePen, opColumnNumber, 0, opColumnNumber, imageHt - 1); opColumnNumber++; // draw Cluster ID at bottom of the image if (minutesArray.Length > 3) { Bitmap clusterIDImage = new Bitmap(minutesArray.Length, SpectrogramConstants.HEIGHT_OF_TITLE_BAR - 6); Graphics g2 = Graphics.FromImage(clusterIDImage); g2.Clear(Color.Black); gr.DrawImage(clusterIDImage, clusterStartColumn, imageHt - 19); int location = opColumnNumber - ((opColumnNumber - clusterStartColumn) / 2); gr.DrawString(clusterLabel[sortID], stringFont, Brushes.White, new PointF(location - 10, imageHt - 19)); } } //Draw the title bar Image titleBar = DrawTitleBarOfClusterSpectrogram(title, opImageWidth - 2); gr.DrawImage(titleBar, 1, 0); opImage.Save(Path.Combine(opDir, opFileName)); }
public static double[] CalculateScores(double[] subBandSpectrum, int windowWidth) { double[] scores = { 0, 0, 0 }; //TEST ONE /* * double totalAreaUnderSpectrum = subBandSpectrum.Sum(); * double areaUnderLowest24bins = 0.0; * for (int i = 0; i < 24; i++) * { * areaUnderLowest24bins += subBandSpectrum[i]; * } * double areaUnderHighBins = totalAreaUnderSpectrum - areaUnderLowest24bins; * double areaUnderBins4to7 = 0.0; * for (int i = 4; i < 7; i++) * { * areaUnderBins4to7 += subBandSpectrum[i]; * } * double ratio1 = areaUnderBins4to7 / areaUnderLowest24bins; * * double areaUnderBins38to72 = 0.0; * for (int i = 38; i < 44; i++) * { * areaUnderBins38to72 += subBandSpectrum[i]; * } * for (int i = 52; i < 57; i++) * { * areaUnderBins38to72 += subBandSpectrum[i]; * } * for (int i = 64; i < 72; i++) * { * areaUnderBins38to72 += subBandSpectrum[i]; * } * double ratio2 = areaUnderBins38to72 / areaUnderHighBins; * double score = (ratio1 * 0.2) + (ratio2 * 0.8); * double[] truePositives = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0006, 0.0014, 0.0015, 0.0010, 0.0002, 0.0001, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0005, 0.0006, 0.0005, 0.0003, 0.0002, 0.0001, 0.0002, 0.0007, 0.0016, 0.0026, 0.0035, 0.0037, 0.0040, 0.0046, 0.0040, 0.0031, 0.0022, 0.0048, 0.0133, 0.0149, 0.0396, 0.1013, 0.1647, 0.2013, 0.2236, 0.2295, 0.1836, 0.1083, 0.0807, 0.0776, 0.0964, 0.1116, 0.0987, 0.1065, 0.1575, 0.3312, 0.4829, 0.5679, 0.5523, 0.4412, 0.2895, 0.2022, 0.2622, 0.2670, 0.2355, 0.1969, 0.2220, 0.6600, 0.9023, 1.0000, 0.8099, 0.8451, 0.8210, 0.5511, 0.1756, 0.0319, 0.0769, 0.0738, 0.2235, 0.3901, 0.4565, 0.4851, 0.3703, 0.3643, 0.2497, 0.2705, 0.3456, 0.3096, 0.1809, 0.0710, 0.0828, 0.0857, 0.0953, 0.1308, 0.1387, 0.0590 }; * * if (score > 0.4) * eventFound = true; * if ((areaUnderHighBins/3) < areaUnderLowest24bins) * //if (ratio1 > ratio2) * { * eventFound = false; * } */ // TEST TWO (A) // these are used for scoring //double[] truePositives1 = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0006, 0.0014, 0.0015, 0.0010, 0.0002, 0.0001, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0005, 0.0006, 0.0005, 0.0003, 0.0002, 0.0001, 0.0002, 0.0007, 0.0016, 0.0026, 0.0035, 0.0037, 0.0040, 0.0046, 0.0040, 0.0031, 0.0022, 0.0048, 0.0133, 0.0149, 0.0396, 0.1013, 0.1647, 0.2013, 0.2236, 0.2295, 0.1836, 0.1083, 0.0807, 0.0776, 0.0964, 0.1116, 0.0987, 0.1065, 0.1575, 0.3312, 0.4829, 0.5679, 0.5523, 0.4412, 0.2895, 0.2022, 0.2622, 0.2670, 0.2355, 0.1969, 0.2220, 0.6600, 0.9023, 1.0000, 0.8099, 0.8451, 0.8210, 0.5511, 0.1756, 0.0319, 0.0769, 0.0738, 0.2235, 0.3901, 0.4565, 0.4851, 0.3703, 0.3643, 0.2497, 0.2705, 0.3456, 0.3096, 0.1809, 0.0710, 0.0828, 0.0857, 0.0953, 0.1308, 0.1387, 0.0590 }; //double[] truePositives2 = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0001, 0.0001, 0.0001, 0.0000, 0.0000, 0.0001, 0.0001, 0.0003, 0.0004, 0.0004, 0.0002, 0.0001, 0.0001, 0.0003, 0.0003, 0.0006, 0.0007, 0.0020, 0.0127, 0.0256, 0.0426, 0.0512, 0.0560, 0.0414, 0.0237, 0.0133, 0.0107, 0.0091, 0.0077, 0.0085, 0.0165, 0.0144, 0.0308, 0.0416, 0.0454, 0.0341, 0.0191, 0.0128, 0.0058, 0.0026, 0.0081, 0.0139, 0.0313, 0.0404, 0.0493, 0.0610, 0.1951, 0.4083, 0.5616, 0.5711, 0.5096, 0.4020, 0.2917, 0.1579, 0.1421, 0.1461, 0.1406, 0.2098, 0.1676, 0.2758, 0.2875, 0.6513, 0.9374, 1.0000, 0.7576, 0.4130, 0.2622, 0.1495, 0.0973, 0.0623, 0.0425, 0.0205, 0.0034, 0.0065, 0.0054, 0.0089, 0.0138, 0.0208, 0.0204, 0.0168, 0.0136, 0.0149, 0.0155, 0.0106, 0.0086, 0.0099, 0.0187 }; //double[] truePositivesA = NormalDist.Convert2ZScores(truePositivesA); //double[] truePositivesB = NormalDist.Convert2ZScores(truePositivesB); // TEST TWO (B) // Use these spectra when using my filtering (i.e. not Chris's prefiltered) // these spectra are used for scoring when the window size is 2048 //double[] truePositives1 = { 0.0014, 0.0012, 0.0009, 0.0003, 0.0001, 0.0005, 0.0008, 0.0029, 0.0057, 0.0070, 0.0069, 0.0063, 0.0053, 0.0032, 0.0013, 0.0011, 0.0011, 0.0007, 0.0000, 0.0006, 0.0010, 0.0013, 0.0008, 0.0009, 0.0022, 0.0046, 0.0069, 0.0082, 0.0070, 0.0065, 0.0082, 0.0078, 0.0052, 0.0021, 0.0132, 0.0357, 0.0420, 0.0996, 0.2724, 0.4557, 0.5739, 0.6366, 0.6155, 0.4598, 0.2334, 0.1468, 0.1410, 0.1759, 0.2157, 0.1988, 0.2131, 0.3072, 0.6161, 0.8864, 1.0000, 0.9290, 0.6983, 0.4208, 0.2690, 0.3190, 0.3109, 0.2605, 0.1896, 0.2118, 0.5961, 0.8298, 0.9290, 0.7363, 0.6605, 0.5840, 0.3576, 0.1019, 0.0162, 0.0400, 0.0405, 0.1106, 0.1803, 0.2083, 0.2058, 0.1475, 0.1387, 0.0870, 0.0804, 0.0975, 0.0848, 0.0490, 0.0193, 0.0217, 0.0210, 0.0214, 0.0253, 0.0254, 0.0072 }; //double[] truePositives2 = { 0.0090, 0.0106, 0.0138, 0.0134, 0.0088, 0.0026, 0.0002, 0.0002, 0.0003, 0.0000, 0.0001, 0.0006, 0.0013, 0.0019, 0.0020, 0.0015, 0.0008, 0.0004, 0.0002, 0.0015, 0.0022, 0.0073, 0.0195, 0.0628, 0.2203, 0.4031, 0.5635, 0.5445, 0.4828, 0.2869, 0.1498, 0.0588, 0.0500, 0.0542, 0.0641, 0.1188, 0.1833, 0.1841, 0.2684, 0.3062, 0.2831, 0.1643, 0.0606, 0.0336, 0.0136, 0.0056, 0.0187, 0.0301, 0.0700, 0.1103, 0.1559, 0.2449, 0.5303, 0.8544, 1.0000, 0.8361, 0.6702, 0.4839, 0.3463, 0.1525, 0.1049, 0.1201, 0.1242, 0.2056, 0.1653, 0.2685, 0.2947, 0.5729, 0.7024, 0.6916, 0.4765, 0.2488, 0.1283, 0.0543, 0.0326, 0.0236, 0.0187, 0.0108, 0.0021, 0.0028, 0.0019, 0.0024, 0.0041, 0.0063, 0.0066, 0.0055, 0.0036, 0.0025, 0.0018, 0.0014, 0.0013, 0.0008, 0.0010 }; // these spectra are used for scoring when the window size is 1024 double[] truePositives1 = { 0.0007, 0.0004, 0.0000, 0.0025, 0.0059, 0.0069, 0.0044, 0.0012, 0.0001, 0.0006, 0.0013, 0.0032, 0.0063, 0.0067, 0.0070, 0.0033, 0.0086, 0.0128, 0.1546, 0.4550, 0.6197, 0.4904, 0.2075, 0.0714, 0.1171, 0.4654, 0.8634, 1.0000, 0.7099, 0.2960, 0.1335, 0.3526, 0.6966, 0.9215, 0.6628, 0.3047, 0.0543, 0.0602, 0.0931, 0.1364, 0.1314, 0.1047, 0.0605, 0.0204, 0.0128, 0.0114 }; double[] truePositives2 = { 0.0126, 0.0087, 0.0043, 0.0002, 0.0000, 0.0010, 0.0018, 0.0016, 0.0005, 0.0002, 0.0050, 0.1262, 0.4054, 0.5111, 0.3937, 0.1196, 0.0156, 0.0136, 0.0840, 0.1598, 0.1691, 0.0967, 0.0171, 0.0152, 0.0234, 0.3648, 0.8243, 1.0000, 0.6727, 0.2155, 0.0336, 0.0240, 0.2661, 0.6240, 0.7523, 0.5098, 0.1493, 0.0149, 0.0046, 0.0020, 0.0037, 0.0061, 0.0061, 0.0036, 0.0010, 0.0008 }; var zscores = NormalDist.Convert2ZScores(subBandSpectrum); double correlationScore = 0.0; double score1 = AutoAndCrossCorrelation.CorrelationCoefficient(zscores, truePositives1); double score2 = AutoAndCrossCorrelation.CorrelationCoefficient(zscores, truePositives2); correlationScore = score1; if (score2 > correlationScore) { correlationScore = score2; } // TEST THREE: sharpness and height of peaks // score the four heighest peaks double peaksScore = 0; double[] spectrumCopy = new double[subBandSpectrum.Length]; for (int i = 0; i < subBandSpectrum.Length; i++) { spectrumCopy[i] = subBandSpectrum[i]; } // set spectrum bounds int lowerBound = subBandSpectrum.Length / 4; int upperBound = subBandSpectrum.Length * 7 / 8; for (int p = 0; p < 4; p++) { int peakLocation = DataTools.GetMaxIndex(spectrumCopy); if (peakLocation < lowerBound) { continue; // peak location cannot be too low } if (peakLocation > upperBound) { continue; // peak location cannot be too high } double peakHeight = spectrumCopy[peakLocation]; int nh = 3; if (windowWidth == 2048) { nh = 6; } double peakSides = (subBandSpectrum[peakLocation - nh] + subBandSpectrum[peakLocation + nh]) / 2; peaksScore += peakHeight - peakSides; //now zero peak and peak neighbourhood if (windowWidth == 2048) { nh = 9; } for (int n = 0; n < nh; n++) { spectrumCopy[peakLocation + n] = 0; spectrumCopy[peakLocation - n] = 0; } } // for 4 peaks // take average of four peaks peaksScore /= 4; // TEST FOUR: peak position ratios // //int[] peakLocationCentres = { 3, 10, 37, 44, 54, 67 }; int[] peakLocationCentres = { 2, 5, 19, 22, 27, 33 }; int nh2 = 6; if (windowWidth == 1024) { nh2 = 3; } int[] actualPeakLocations = new int[6]; double[] relativePeakHeights = new double[6]; for (int p = 0; p < 6; p++) { double max = -double.MaxValue; int maxId = peakLocationCentres[p]; for (int id = peakLocationCentres[p] - 4; id < peakLocationCentres[p] + 4; id++) { if (id < 0) { id = 0; } if (subBandSpectrum[id] > max) { max = subBandSpectrum[id]; maxId = id; } } actualPeakLocations[p] = maxId; int lowerPosition = maxId - nh2; if (lowerPosition < 0) { lowerPosition = 0; } relativePeakHeights[p] = subBandSpectrum[maxId] - subBandSpectrum[lowerPosition] - subBandSpectrum[maxId + nh2]; } double[] targetHeights = { 0.1, 0.1, 0.5, 0.5, 1.0, 0.6 }; var zscores1 = NormalDist.Convert2ZScores(relativePeakHeights); var zscores2 = NormalDist.Convert2ZScores(targetHeights); double relativePeakScore = AutoAndCrossCorrelation.CorrelationCoefficient(zscores1, zscores2); //########################################################################################### // PROCESS SCORES //if (score1 > scoreThreshold) eventFound = true; //if ((score1 > scoreThreshold) || (score2 > scoreThreshold)) eventFound = true; //double score = (correlationScore * 0.3) + (peaksScore * 0.7); double score = (relativePeakScore * 0.4) + (peaksScore * 0.6); scores[0] = score; scores[1] = relativePeakScore; scores[2] = peaksScore; return(scores); }
public void TestFreqScaleOnArtificialSignal2() { int sampleRate = 64000; double duration = 30; // signal duration in seconds int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal2_OctaveFreqScale.png"); var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); // init the default sonogram config var sonoConfig = new SonogramConfig { WindowSize = freqScale.WindowSize, WindowOverlap = 0.2, SourceFName = "Signal2", NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); // pick a row, any row var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); oneSpectrum = DataTools.filterMovingAverage(oneSpectrum, 5); var peaks = DataTools.GetPeaks(oneSpectrum); var peakIds = new List <int>(); for (int i = 5; i < peaks.Length - 5; i++) { if (peaks[i]) { int peakId = freqScale.BinBounds[i, 0]; peakIds.Add(peakId); LoggedConsole.WriteLine($"Spectral peak located in bin {peakId}, Herz={freqScale.BinBounds[i, 1]}"); } } foreach (int h in harmonics) { LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); } Assert.AreEqual(5, peakIds.Count); Assert.AreEqual(129, peakIds[0]); Assert.AreEqual(257, peakIds[1]); Assert.AreEqual(513, peakIds[2]); Assert.AreEqual(1025, peakIds[3]); Assert.AreEqual(2049, peakIds[4]); var image = sonogram.GetImage(); string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sampleRate} Window={freqScale.WindowSize}"; image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); image.Save(outputImagePath); // Check that image dimensions are correct Assert.AreEqual(146, image.Width); Assert.AreEqual(310, image.Height); }
public static Image <Rgb24> GetSonogramImage(double[,] data, int nyquistFreq, int maxFrequency, bool doMelScale, int binHeight, bool doHighlightSubband, int subBandMinHz, int subBandMaxHz) { int width = data.GetLength(0); // Number of spectra in sonogram int fftBins = data.GetLength(1); int maxBin = (int)Math.Floor(fftBins * maxFrequency / (double)nyquistFreq); int imageHeight = maxBin * binHeight; // image ht = sonogram ht. Later include grid and score scales //set up min, max, range for normalising of dB values DataTools.MinMax(data, out double min, out double max); double range = max - min; // readjust min and max to create the effect of contrast stretching. It enhances the spectrogram a bit double fractionalStretching = 0.01; min = min + (range * fractionalStretching); max = max - (range * fractionalStretching); range = max - min; //int? minHighlightFreq = this.subBand_MinHz; //int? maxHighlightFreq = this.subBand_MaxHz; //int minHighlightBin = (minHighlightFreq == null) ? 0 : (int)Math.Round((double)minHighlightFreq / (double)NyquistFrequency * fftBins); //int maxHighlightBin = (maxHighlightFreq == null) ? 0 : (int)Math.Round((double)maxHighlightFreq / (double)NyquistFrequency * fftBins); //calculate top and bottom of sub-band int minHighlightBin = (int)Math.Round(subBandMinHz / (double)nyquistFreq * fftBins); int maxHighlightBin = (int)Math.Round(subBandMaxHz / (double)nyquistFreq * fftBins); if (doMelScale) { double maxMel = MFCCStuff.Mel(nyquistFreq); int melRange = (int)(maxMel - 0 + 1); double pixelPerMel = imageHeight / (double)melRange; double minBandMel = MFCCStuff.Mel(subBandMinHz); double maxBandMel = MFCCStuff.Mel(subBandMaxHz); minHighlightBin = (int)Math.Round(minBandMel * pixelPerMel); maxHighlightBin = (int)Math.Round(maxBandMel * pixelPerMel); } Color[] grayScale = ImageTools.GrayScale(); var bmp = new Image <Rgb24>(width, imageHeight); int yOffset = imageHeight; // for all freq bins for (int y = 0; y < maxBin; y++) { //repeat this bin if ceptral image for (int r = 0; r < binHeight; r++) { // for all pixels in line for (int x = 0; x < width; x++) { // NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range double value = (data[x, y] - min) / range; int c = 255 - (int)Math.Floor(255.0 * value); //original version if (c < 0) { c = 0; } else if (c >= 256) { c = 255; } int g = c + 40; // green tinge used in the template scan band if (g >= 256) { g = 255; } var col = doHighlightSubband && IsInBand(y, minHighlightBin, maxHighlightBin) ? Color.FromRgb((byte)c, (byte)g, (byte)c) : grayScale[c]; bmp[x, yOffset - 1] = col; } yOffset--; } //end repeats over one track } return(bmp); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new LitoriaCaeruleaConfig(); recognizerConfig.ReadConfigFile(configuration); // common properties string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "<no name>"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // BETTER TO SET THESE. IGNORE USER! // This framesize is large because the oscillation we wish to detect is due to repeated croaks // having an interval of about 0.6 seconds. The overlap is also required to give smooth oscillation. const int frameSize = 2048; const double windowOverlap = 0.5; // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, // use the default HAMMING window //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.None NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = 0.0, }; TimeSpan recordingDuration = recording.WavReader.Time; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = sr / (sonoConfig.WindowSize * (1 - windowOverlap)); //int dominantFreqBin = (int)Math.Round(recognizerConfig.DominantFreq / freqBinWidth) + 1; int minBin = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1; int maxBin = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1; var decibelThreshold = 9.0; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER int rowCount = sonogram.Data.GetLength(0); // get the freq band as set by min and max Herz var frogBand = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin); // Now look for spectral maxima. For L.caerulea, the max should lie around 1100Hz +/-150 Hz. // Skip over spectra where maximum is not in correct location. int buffer = 150; var croakScoreArray = new double[rowCount]; var hzAtTopOfTopBand = recognizerConfig.DominantFreq + buffer; var hzAtBotOfTopBand = recognizerConfig.DominantFreq - buffer; var binAtTopOfTopBand = (int)Math.Round((hzAtTopOfTopBand - recognizerConfig.MinHz) / freqBinWidth); var binAtBotOfTopBand = (int)Math.Round((hzAtBotOfTopBand - recognizerConfig.MinHz) / freqBinWidth); // scan the frog band and get the decibel value of those spectra which have their maximum within the correct subband. for (int x = 0; x < rowCount; x++) { //extract spectrum var spectrum = MatrixTools.GetRow(frogBand, x); int maxIndex = DataTools.GetMaxIndex(spectrum); if (spectrum[maxIndex] < decibelThreshold) { continue; } if (maxIndex < binAtTopOfTopBand && maxIndex > binAtBotOfTopBand) { croakScoreArray[x] = spectrum[maxIndex]; } } // Perpare a normalised plot for later display with spectrogram double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var text1 = string.Format($"Croak scores (threshold={decibelThreshold})"); var croakPlot1 = new Plot(text1, normalisedScores, normalisedThreshold); // extract potential croak events from the array of croak candidate var croakEvents = AcousticEvent.ConvertScoreArray2Events( croakScoreArray, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, recognizerConfig.EventThreshold, recognizerConfig.MinCroakDuration, recognizerConfig.MaxCroakDuration, segmentStartOffset); // add necesary info into the candidate events var prunedEvents = new List <AcousticEvent>(); foreach (var ae in croakEvents) { // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; prunedEvents.Add(ae); } // With those events that survive the above Array2Events process, we now extract a new array croak scores croakScoreArray = AcousticEvent.ExtractScoreArrayFromEvents(prunedEvents, rowCount, recognizerConfig.AbbreviatedSpeciesName); DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var text2 = string.Format($"Croak events (threshold={decibelThreshold})"); var croakPlot2 = new Plot(text2, normalisedScores, normalisedThreshold); // Look for oscillations in the difference array // duration of DCT in seconds //croakScoreArray = DataTools.filterMovingAverageOdd(croakScoreArray, 5); double dctDuration = recognizerConfig.DctDuration; // minimum acceptable value of a DCT coefficient double dctThreshold = recognizerConfig.DctThreshold; double minOscRate = 1 / recognizerConfig.MaxPeriod; double maxOscRate = 1 / recognizerConfig.MinPeriod; var dctScores = Oscillations2012.DetectOscillations(croakScoreArray, framesPerSecond, dctDuration, minOscRate, maxOscRate, dctThreshold); // ###################################################################### // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER var events = AcousticEvent.ConvertScoreArray2Events( dctScores, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond, freqBinWidth, recognizerConfig.EventThreshold, recognizerConfig.MinDuration, recognizerConfig.MaxDuration, segmentStartOffset); double[,] hits = null; prunedEvents = new List <AcousticEvent>(); foreach (var ae in events) { // add additional info ae.SpeciesName = speciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.Name = recognizerConfig.AbbreviatedSpeciesName; prunedEvents.Add(ae); } // do a recognizer test. if (MainEntry.InDEBUG) { //TestTools.RecognizerScoresTest(scores, new FileInfo(recording.FilePath)); //AcousticEvent.TestToCompareEvents(prunedEvents, new FileInfo(recording.FilePath)); } var scoresPlot = new Plot(this.DisplayName, dctScores, recognizerConfig.EventThreshold); if (true) { // display a variety of debug score arrays // calculate amplitude at location double[] amplitudeArray = MatrixTools.SumRows(frogBand); DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold); var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scoresPlot, croakPlot2, croakPlot1, amplPlot }; // NOTE: This DrawDebugImage() method can be over-written in this class. var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits); var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); debugImage.Save(debugPath); } return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = scoresPlot.AsList(), Events = prunedEvents, //Events = events }); }
//////public static IndexCalculateResult Analysis( public static SpectralIndexValuesForContentDescription Analysis( AudioRecording recording, TimeSpan segmentOffsetTimeSpan, int sampleRateOfOriginalAudioFile, bool returnSonogramInfo = false) { // returnSonogramInfo = true; // if debugging double epsilon = recording.Epsilon; int sampleRate = recording.WavReader.SampleRate; //var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); var indexCalculationDuration = TimeSpan.FromSeconds(ContentSignatures.IndexCalculationDurationInSeconds); // Get FRAME parameters for the calculation of Acoustic Indices int frameSize = ContentSignatures.FrameSize; int frameStep = frameSize; // that is, windowOverlap = zero double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second var frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond)); // INITIALISE a RESULTS STRUCTURE TO return // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc. var config = new IndexCalculateConfig(); // sets some default values int freqBinCount = frameSize / 2; var indexProperties = GetIndexProperties(); ////////var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, segmentOffsetTimeSpan, config); var spectralIndices = new SpectralIndexValuesForContentDescription(); ///////result.SummaryIndexValues = null; ///////SpectralIndexValues spectralIndices = result.SpectralIndexValues; // set up default spectrogram to return ///////result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null; ///////result.Hits = null; ///////result.TrackScores = new List<Plot>(); // ################################## FINISHED SET-UP // ################################## NOW GET THE AMPLITUDE SPECTROGRAM // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT // Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns. var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep); var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram; // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ################################## // Get the oscillation spectral index OSC separately from signal because need a different frame size etc. var sampleLength = Oscillations2014.DefaultSampleLength; var frameLength = Oscillations2014.DefaultFrameLength; var sensitivity = Oscillations2014.DefaultSensitivityThreshold; var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(recording, frameLength, sampleLength, sensitivity); // double length of the vector because want to work with 256 element vector for spectrogram purposes spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort); // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ################################## // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2. // original sample rate can be anything 11.0-44.1 kHz. int originalNyquist = sampleRateOfOriginalAudioFile / 2; // if up-sampling has been done if (dspOutput1.NyquistFreq > originalNyquist) { dspOutput1.NyquistFreq = originalNyquist; dspOutput1.NyquistBin = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that bin width does not change } // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX spectralIndices.ACI = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram); // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram); for (int i = 0; i < temporalEntropySpectrum.Length; i++) { temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i]; } spectralIndices.ENT = temporalEntropySpectrum; // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ################################## // i: Convert amplitude spectrogram to decibels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); spectralIndices.BGN = spectralDecibelBgn; // ii: Calculate the noise reduced decibel spectrogram derived from segment recording. // REUSE the var decibelSpectrogram but this time using dspOutput1. decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(decibelSpectrogram); // ###################################################################################################################################################### // iv: CALCULATE SPECTRAL COVER. NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0 // FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth // dB THRESHOLD for calculating spectral coverage double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb; // Calculate lower and upper boundary bin ids. // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from bio-phony. int midFreqBound = config.MidFreqBound; int lowFreqBound = config.LowFreqBound; int lowerBinBound = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth); int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth); var spActivity = ActivityAndCover.CalculateSpectralEvents(decibelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound); //spectralIndices.CVR = spActivity.CoverSpectrum; spectralIndices.EVN = spActivity.EventSpectrum; ///////result.TrackScores = null; ///////return result; return(spectralIndices); } // end calculation of Six Spectral Indices
public Image <Rgb24> GetImage_ReducedSonogram(int factor, bool drawGridLines) { // double[] logEnergy = this.LogEnPerFrame; var data = this.Data; //sonogram intensity values int frameCount = data.GetLength(0); // Number of spectra in sonogram int imageHeight = data.GetLength(1); // image ht = sonogram ht. Later include grid and score scales int imageWidth = frameCount / factor; int subSample = frameCount / imageWidth; //set up min, max, range for normalising of dB values DataTools.MinMax(data, out double min, out double max); double range = max - min; var grayScale = ImageTools.GrayScale(); //set up the 1000kHz scale int herzInterval = 1000; int[] vScale = FrequencyScale.CreateLinearYaxis(herzInterval, this.NyquistFrequency, imageHeight); //calculate location of 1000Hz grid lines var bmp = new Image <Rgb24>(imageWidth, imageHeight); for (int w = 0; w < imageWidth; w++) { int start = w * subSample; int end = ((w + 1) * subSample) - 1; double maxE = -double.MaxValue; int maxId = 0; for (int x = start; x < end; x++) { // NOTE!@#$%^ This was changed from LogEnergy on 30th March 2009. if (maxE < this.DecibelsPerFrame[x]) { maxE = this.DecibelsPerFrame[x]; maxId = x; } } // have found the frame with max energy. Now draw its spectrum // over all freq bins for (int y = 0; y < data.GetLength(1); y++) { // NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range double value = (data[maxId, y] - min) / range; int c = 255 - (int)Math.Floor(255.0 * value); //original version if (c < 0) { c = 0; } else if (c >= 256) { c = 255; } var col = grayScale[c]; bmp[w, imageHeight - y - 1] = col; } //end over all freq bins //set up grid color if (drawGridLines) { var gridCol = Color.Black; if (w % 2 == 0) { gridCol = Color.Black; } //over all Y-axis pixels for (int p = 0; p < vScale.Length; p++) { if (vScale[p] == 0) { continue; } int y = imageHeight - p; bmp[w, y] = gridCol; } } } return(bmp); }
/// <summary> /// Apply feature learning process on a set of target (1-minute) recordings (inputPath) /// according to the a set of centroids learned using feature learning process. /// Output feature vectors (outputPath). /// </summary> public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config, List <double[][]> allCentroids, string inputPath, string outputPath) { var simVecDir = Directory.CreateDirectory(Path.Combine(outputPath, "SimilarityVectors")); int frameSize = config.FrameSize; int finalBinCount = config.FinalBinCount; FreqScaleType scaleType = config.FrequencyScaleType; var settings = new SpectrogramSettings() { WindowSize = frameSize, // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second // The "WindowOverlap" is calculated to answer this question // each 24 single-frames duration is equal to 1 second // note that the "WindowOverlap" value should be recalculated if frame size is changed // this has not yet been considered in the Config file! WindowOverlap = 0.10725204, DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; double frameStep = frameSize * (1 - settings.WindowOverlap); int minFreqBin = config.MinFreqBin; int maxFreqBin = config.MaxFreqBin; int numFreqBand = config.NumFreqBand; int patchWidth = (maxFreqBin - minFreqBin + 1) / numFreqBand; int patchHeight = config.PatchHeight; // the number of frames that their feature vectors will be concatenated in order to preserve temporal information. int frameWindowLength = config.FrameWindowLength; // the step size to make a window of frames int stepSize = config.StepSize; // the factor of downsampling int maxPoolingFactor = config.MaxPoolingFactor; // check whether there is any file in the folder/subfolders if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0) { throw new ArgumentException("The folder of recordings is empty..."); } //***** // lists of features for all processing files // the key is the file name, and the value is the features for different bands Dictionary <string, List <double[, ]> > allFilesMinFeatureVectors = new Dictionary <string, List <double[, ]> >(); Dictionary <string, List <double[, ]> > allFilesMeanFeatureVectors = new Dictionary <string, List <double[, ]> >(); Dictionary <string, List <double[, ]> > allFilesMaxFeatureVectors = new Dictionary <string, List <double[, ]> >(); Dictionary <string, List <double[, ]> > allFilesStdFeatureVectors = new Dictionary <string, List <double[, ]> >(); Dictionary <string, List <double[, ]> > allFilesSkewnessFeatureVectors = new Dictionary <string, List <double[, ]> >(); double[,] inputMatrix; List <AudioRecording> recordings = new List <AudioRecording>(); foreach (string filePath in Directory.GetFiles(inputPath, "*.wav")) { FileInfo fileInfo = filePath.ToFileInfo(); // process the wav file if it is not empty if (fileInfo.Length != 0) { var recording = new AudioRecording(filePath); settings.SourceFileName = recording.BaseName; if (config.DoSegmentation) { recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep); } else { recordings.Add(recording); } for (int s = 0; s < recordings.Count; s++) { string pathToSimilarityVectorsFile = Path.Combine(simVecDir.FullName, fileInfo.Name + "-" + s.ToString() + ".csv"); var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recordings[s].WavReader); var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram); // DO RMS NORMALIZATION //sonogram.Data = SNR.RmsNormalization(sonogram.Data); // DO NOISE REDUCTION if (config.DoNoiseReduction) { decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data); } // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins if (minFreqBin != 1 || maxFreqBin != finalBinCount) { inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin); } else { inputMatrix = decibelSpectrogram.Data; } // creating matrices from different freq bands of the source spectrogram List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand); double[][,] matrices2 = allSubmatrices2.ToArray(); List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>(); for (int i = 0; i < matrices2.GetLength(0); i++) { // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling double[,] downsampledMatrix = FeatureLearning.MaxPooling(matrices2[i], config.MaxPoolingFactor); int rows = downsampledMatrix.GetLength(0); int columns = downsampledMatrix.GetLength(1); var sequentialPatches = PatchSampling.GetPatches(downsampledMatrix, patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential); allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix()); } // +++++++++++++++++++++++++++++++++++Feature Transformation // to do the feature transformation, we normalize centroids and // sequential patches from the input spectrogram to unit length // Then, we calculate the dot product of each patch with the centroids' matrix List <double[][]> allNormCentroids = new List <double[][]>(); for (int i = 0; i < allCentroids.Count; i++) { // double check the index of the list double[][] normCentroids = new double[allCentroids.ToArray()[i].GetLength(0)][]; for (int j = 0; j < allCentroids.ToArray()[i].GetLength(0); j++) { normCentroids[j] = ART_2A.NormaliseVector(allCentroids.ToArray()[i][j]); } allNormCentroids.Add(normCentroids); } List <double[][]> allFeatureTransVectors = new List <double[][]>(); // processing the sequential patch matrix for each band for (int i = 0; i < allSequentialPatchMatrix.Count; i++) { List <double[]> featureTransVectors = new List <double[]>(); double[][] similarityVectors = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][]; for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++) { // normalize each patch to unit length var inputVector = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j]; var normVector = inputVector; // to avoid vectors with NaN values, only normalize those that their norm is not equal to zero. if (inputVector.Euclidean() != 0) { normVector = ART_2A.NormaliseVector(inputVector); } similarityVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector); } Csv.WriteMatrixToCsv(pathToSimilarityVectorsFile.ToFileInfo(), similarityVectors.ToMatrix()); // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames // using FrameWindowLength // patchId refers to the patch id that has been processed so far according to the step size. // if we want no overlap between different frame windows, then stepSize = frameWindowLength int patchId = 0; while (patchId + frameWindowLength - 1 < similarityVectors.GetLength(0)) { List <double[]> patchGroup = new List <double[]>(); for (int k = 0; k < frameWindowLength; k++) { patchGroup.Add(similarityVectors[k + patchId]); } featureTransVectors.Add(DataTools.ConcatenateVectors(patchGroup)); patchId = patchId + stepSize; } allFeatureTransVectors.Add(featureTransVectors.ToArray()); } // +++++++++++++++++++++++++++++++++++Feature Transformation // +++++++++++++++++++++++++++++++++++Temporal Summarization // Based on the resolution to generate features, the "numFrames" parameter will be set. // Each 24 single-frame patches form 1 second // for each 24 patch, we generate 5 vectors of min, mean, std, and max (plus skewness from Accord.net) // The pre-assumption is that each input recording is 1 minute long // store features of different bands in lists List <double[, ]> allMinFeatureVectors = new List <double[, ]>(); List <double[, ]> allMeanFeatureVectors = new List <double[, ]>(); List <double[, ]> allMaxFeatureVectors = new List <double[, ]>(); List <double[, ]> allStdFeatureVectors = new List <double[, ]>(); List <double[, ]> allSkewnessFeatureVectors = new List <double[, ]>(); // Each 24 frames form 1 second using WindowOverlap // factors such as stepSize, and maxPoolingFactor should be considered in temporal summarization. int numFrames = 24 / (patchHeight * stepSize * maxPoolingFactor); foreach (var freqBandFeature in allFeatureTransVectors) { List <double[]> minFeatureVectors = new List <double[]>(); List <double[]> meanFeatureVectors = new List <double[]>(); List <double[]> maxFeatureVectors = new List <double[]>(); List <double[]> stdFeatureVectors = new List <double[]>(); List <double[]> skewnessFeatureVectors = new List <double[]>(); int c = 0; while (c + numFrames <= freqBandFeature.GetLength(0)) { // First, make a list of patches that would be equal to the needed resolution (1 second, 60 second, etc.) List <double[]> sequencesOfFramesList = new List <double[]>(); for (int i = c; i < c + numFrames; i++) { sequencesOfFramesList.Add(freqBandFeature[i]); } List <double> min = new List <double>(); List <double> mean = new List <double>(); List <double> std = new List <double>(); List <double> max = new List <double>(); List <double> skewness = new List <double>(); double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix(); // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise for (int j = 0; j < sequencesOfFrames.GetLength(1); j++) { double[] temp = new double[sequencesOfFrames.GetLength(0)]; for (int k = 0; k < sequencesOfFrames.GetLength(0); k++) { temp[k] = sequencesOfFrames[k, j]; } min.Add(temp.GetMinValue()); mean.Add(AutoAndCrossCorrelation.GetAverage(temp)); std.Add(AutoAndCrossCorrelation.GetStdev(temp)); max.Add(temp.GetMaxValue()); skewness.Add(temp.Skewness()); } minFeatureVectors.Add(min.ToArray()); meanFeatureVectors.Add(mean.ToArray()); maxFeatureVectors.Add(max.ToArray()); stdFeatureVectors.Add(std.ToArray()); skewnessFeatureVectors.Add(skewness.ToArray()); c += numFrames; } // when (freqBandFeature.GetLength(0) % numFrames) != 0, it means there are a number of frames (< numFrames) // (or the whole) at the end of the target recording , left unprocessed. // this would be problematic when an the resolution to generate the feature vector is 1 min, // but the the length of the target recording is a bit less than one min. if (freqBandFeature.GetLength(0) % numFrames != 0 && freqBandFeature.GetLength(0) % numFrames > 1) { // First, make a list of patches that would be less than the required resolution List <double[]> sequencesOfFramesList = new List <double[]>(); int unprocessedFrames = freqBandFeature.GetLength(0) % numFrames; for (int i = freqBandFeature.GetLength(0) - unprocessedFrames; i < freqBandFeature.GetLength(0); i++) { sequencesOfFramesList.Add(freqBandFeature[i]); } List <double> min = new List <double>(); List <double> mean = new List <double>(); List <double> std = new List <double>(); List <double> max = new List <double>(); List <double> skewness = new List <double>(); double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix(); // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise for (int j = 0; j < sequencesOfFrames.GetLength(1); j++) { double[] temp = new double[sequencesOfFrames.GetLength(0)]; for (int k = 0; k < sequencesOfFrames.GetLength(0); k++) { temp[k] = sequencesOfFrames[k, j]; } min.Add(temp.GetMinValue()); mean.Add(AutoAndCrossCorrelation.GetAverage(temp)); std.Add(AutoAndCrossCorrelation.GetStdev(temp)); max.Add(temp.GetMaxValue()); skewness.Add(temp.Skewness()); } minFeatureVectors.Add(min.ToArray()); meanFeatureVectors.Add(mean.ToArray()); maxFeatureVectors.Add(max.ToArray()); stdFeatureVectors.Add(std.ToArray()); skewnessFeatureVectors.Add(skewness.ToArray()); } allMinFeatureVectors.Add(minFeatureVectors.ToArray().ToMatrix()); allMeanFeatureVectors.Add(meanFeatureVectors.ToArray().ToMatrix()); allMaxFeatureVectors.Add(maxFeatureVectors.ToArray().ToMatrix()); allStdFeatureVectors.Add(stdFeatureVectors.ToArray().ToMatrix()); allSkewnessFeatureVectors.Add(skewnessFeatureVectors.ToArray().ToMatrix()); } //***** // the keys of the following dictionaries contain file name // and their values are a list<double[,]> which the list.count is // the number of all subsegments for which features are extracted // the number of freq bands defined as an user-defined parameter. // the 2D-array is the feature vectors. allFilesMinFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMinFeatureVectors); allFilesMeanFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMeanFeatureVectors); allFilesMaxFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMaxFeatureVectors); allFilesStdFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allStdFeatureVectors); allFilesSkewnessFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allSkewnessFeatureVectors); // +++++++++++++++++++++++++++++++++++Temporal Summarization } } } // ++++++++++++++++++++++++++++++++++Writing features to one file // First, concatenate mean, max, std for each second. // Then, write the features of each pre-defined frequency band into a separate CSV file. var filesName = allFilesMeanFeatureVectors.Keys.ToArray(); var minFeatures = allFilesMinFeatureVectors.Values.ToArray(); var meanFeatures = allFilesMeanFeatureVectors.Values.ToArray(); var maxFeatures = allFilesMaxFeatureVectors.Values.ToArray(); var stdFeatures = allFilesStdFeatureVectors.Values.ToArray(); var skewnessFeatures = allFilesSkewnessFeatureVectors.Values.ToArray(); // The number of elements in the list shows the number of freq bands // the size of each element in the list shows the number of files processed to generate feature for. // the dimensions of the matrix shows the number of feature vectors generated for each file and the length of feature vector var allMins = new List <double[][, ]>(); var allMeans = new List <double[][, ]>(); var allMaxs = new List <double[][, ]>(); var allStds = new List <double[][, ]>(); var allSkewness = new List <double[][, ]>(); // looping over freq bands for (int i = 0; i < meanFeatures[0].Count; i++) { var mins = new List <double[, ]>(); var means = new List <double[, ]>(); var maxs = new List <double[, ]>(); var stds = new List <double[, ]>(); var skewnesses = new List <double[, ]>(); // looping over all files for (int k = 0; k < meanFeatures.Length; k++) { mins.Add(minFeatures[k].ToArray()[i]); means.Add(meanFeatures[k].ToArray()[i]); maxs.Add(maxFeatures[k].ToArray()[i]); stds.Add(stdFeatures[k].ToArray()[i]); skewnesses.Add(skewnessFeatures[k].ToArray()[i]); } allMins.Add(mins.ToArray()); allMeans.Add(means.ToArray()); allMaxs.Add(maxs.ToArray()); allStds.Add(stds.ToArray()); allSkewness.Add(skewnesses.ToArray()); } // each element of meanFeatures array is a list of features for different frequency bands. // looping over the number of freq bands for (int i = 0; i < allMeans.ToArray().GetLength(0); i++) { // creating output feature file based on the number of freq bands var outputFeatureFile = Path.Combine(outputPath, "FeatureVectors-" + i.ToString() + ".csv"); // creating the header for CSV file List <string> header = new List <string>(); header.Add("file name"); for (int j = 0; j < allMins.ToArray()[i][0].GetLength(1); j++) { header.Add("min" + j.ToString()); } for (int j = 0; j < allMeans.ToArray()[i][0].GetLength(1); j++) { header.Add("mean" + j.ToString()); } for (int j = 0; j < allMaxs.ToArray()[i][0].GetLength(1); j++) { header.Add("max" + j.ToString()); } for (int j = 0; j < allStds.ToArray()[i][0].GetLength(1); j++) { header.Add("std" + j.ToString()); } for (int j = 0; j < allSkewness.ToArray()[i][0].GetLength(1); j++) { header.Add("skewness" + j.ToString()); } var csv = new StringBuilder(); string content = string.Empty; foreach (var entry in header.ToArray()) { content += entry.ToString() + ","; } csv.AppendLine(content); var allFilesFeatureVectors = new Dictionary <string, double[, ]>(); // looping over files for (int j = 0; j < allMeans.ToArray()[i].GetLength(0); j++) { // concatenating mean, std, and max vector together for the pre-defined resolution List <double[]> featureVectors = new List <double[]>(); for (int k = 0; k < allMeans.ToArray()[i][j].ToJagged().GetLength(0); k++) { List <double[]> featureList = new List <double[]> { allMins.ToArray()[i][j].ToJagged()[k], allMeans.ToArray()[i][j].ToJagged()[k], allMaxs.ToArray()[i][j].ToJagged()[k], allStds.ToArray()[i][j].ToJagged()[k], allSkewness.ToArray()[i][j].ToJagged()[k], }; double[] featureVector = DataTools.ConcatenateVectors(featureList); featureVectors.Add(featureVector); } allFilesFeatureVectors.Add(filesName[j], featureVectors.ToArray().ToMatrix()); } // writing feature vectors to CSV file foreach (var entry in allFilesFeatureVectors) { content = string.Empty; content += entry.Key.ToString() + ","; foreach (var cent in entry.Value) { content += cent.ToString() + ","; } csv.AppendLine(content); } File.WriteAllText(outputFeatureFile, csv.ToString()); } }
public IActionResult Friends(int page = 1, int row = 10) { ViewBag.Title = DataTools.MakeWebTitle("友情链接"); HttpContext.Response.Headers.Add("title", DataTools.MakeWebTitle("友情链接", true)); return(EnhancedView("Friends")); }
} //Execute() public static Output GetInstanceRepresentations(Arguments arguments) { LoggedConsole.WriteLine("1. Read in all Instances and do feature extraction"); //################################### FEATURE WEIGHTS //TRY DIFFERENT WEIGHTINGS assuming following "SPT,RHZ,RVT,RPS,RNG"; bool doDeltaFeatures = false; double[] weights = { 1.0, 1.0, 0.8, 0.7, 0.7 }; double[] deltaWeights = { 1.0, 1.0, 0.8, 0.7, 0.7, 0.5, 0.4, 0.4, 0.2, 0.2 }; if (doDeltaFeatures) { weights = deltaWeights; } //MAX-POOLING for SPECTRAL REDUCTION // frequency bins used to reduce dimensionality of the 256 spectral values. int startBin = 8; int maxOf2Bin = 117; int maxOf3Bin = 160; int endBin = 200; double[] testArray = new double[256]; for (int i = 0; i < testArray.Length; i++) { testArray[i] = i; } double[] reducedArray = MaxPoolingLimited(testArray, startBin, maxOf2Bin, maxOf3Bin, endBin); int reducedSpectralLength = reducedArray.Length; LoggedConsole.WriteLine(" Reduced spectral length = " + reducedSpectralLength); int instanceCount = arguments.InstanceCount; int speciesCount = arguments.SpeciesCount; // READ IN THE SPECIES LABELS FILE AND SET UP THE DATA string[] fileID = new string[instanceCount]; int[] speciesID = new int[speciesCount]; ReadGlotinsSpeciesLabelFile(arguments.SpeciesLabelsFile, instanceCount, out fileID, out speciesID); // INIT array of species counts int[] instanceNumbersPerSpecies = new int[speciesCount]; // INIT array of frame counts int[] frameNumbersPerInstance = new int[instanceCount]; // initialise species description matrix var keyArray = FEATURE_KEYS.Split(','); int totalFeatureCount = keyArray.Length * reducedArray.Length; Console.WriteLine(" Total Feature Count = " + totalFeatureCount); if (doDeltaFeatures) { totalFeatureCount *= 2; LoggedConsole.WriteLine(" Total Delta Feature Count = " + totalFeatureCount); } // one matrix row per species double[,] instanceFeatureMatrix = new double[instanceCount, totalFeatureCount]; // loop through all all instances for (int j = 0; j < instanceCount; j++) { LoggedConsole.Write("."); int frameCount = 0; // get the spectral index files int speciesLabel = speciesID[j]; // dictionary to store feature spectra for instance. var aggreDictionary = new Dictionary <string, double[]>(); // dictionary to store delta spectra for instance. var deltaDictionary = new Dictionary <string, double[]>(); foreach (string key in keyArray) { string name = string.Format("{0}_Species{1:d2}.{2}.csv", fileID[j], speciesLabel, key); FileInfo file = new FileInfo(Path.Combine(arguments.InputDataDirectory.FullName, name)); if (file.Exists) { int binCount; double[,] matrix = IndexMatrices.ReadSpectrogram(file, out binCount); // create or get the array of spectral values. double[] aggregateArray = new double[reducedSpectralLength]; double[] deltaArray = new double[reducedSpectralLength]; double[] ipVector = MatrixTools.GetRow(matrix, 0); ipVector = DataTools.SubtractValueAndTruncateToZero(ipVector, arguments.BgnThreshold); reducedArray = MaxPoolingLimited(ipVector, startBin, maxOf2Bin, maxOf3Bin, endBin); double[] previousArray = reducedArray; // transfer spectral values to array. int rowCount = matrix.GetLength(0); //rowCount = (int)Math.Round(rowCount * 0.99); // ###################### USE ONLY 99% of instance //if (rowCount > 1200) rowCount = 1200; for (int r = 1; r < rowCount; r++) { ipVector = MatrixTools.GetRow(matrix, r); ipVector = DataTools.SubtractValueAndTruncateToZero(ipVector, arguments.BgnThreshold); reducedArray = MaxPoolingLimited(ipVector, startBin, maxOf2Bin, maxOf3Bin, endBin); for (int c = 0; c < reducedSpectralLength; c++) { aggregateArray[c] += reducedArray[c]; // Calculate the DELTA values TWO OPTIONS ################################################## double delta = Math.Abs(reducedArray[c] - previousArray[c]); //double delta = reducedArray[c] - previousArray[c]; //if (delta < 0.0) delta = 0.0; //double delta = previousArray[c]; //previous array - i.e. do not calculate delta deltaArray[c] += delta; } previousArray = reducedArray; } aggreDictionary[key] = aggregateArray; deltaDictionary[key] = deltaArray; frameCount = rowCount; } //if (file.Exists) } //foreach (string key in keyArray) instanceNumbersPerSpecies[speciesLabel - 1]++; frameNumbersPerInstance[j] += frameCount; // create the matrix of instance descriptions which consists of concatenated vectors // j = index of instance ID = row number int featureID = 0; foreach (string key in keyArray) { int featureOffset = featureID * reducedSpectralLength; for (int c = 0; c < reducedSpectralLength; c++) { // TWO OPTIONS: SUM OR AVERAGE ###################################### //instanceFeatureMatrix[j, featureOffset + c] = dictionary[key][c]; instanceFeatureMatrix[j, featureOffset + c] = aggreDictionary[key][c] / frameCount; } featureID++; } if (doDeltaFeatures) { foreach (string key in keyArray) { int featureOffset = featureID * reducedSpectralLength; for (int c = 0; c < reducedSpectralLength; c++) { // TWO OPTIONS: SUM OR AVERAGE ###################################### //instanceFeatureMatrix[j, featureOffset + c] = dictionary[key][c]; instanceFeatureMatrix[j, featureOffset + c] = deltaDictionary[key][c] / frameCount; } featureID++; } } // if doDeltaFeatures } // end for loop j over all instances LoggedConsole.WriteLine("Done!"); LoggedConsole.WriteLine("\nSum of species number array = " + instanceNumbersPerSpecies.Sum()); LoggedConsole.WriteLine("Sum of frame number array = " + frameNumbersPerInstance.Sum()); bool addLineNumbers = true; string countsArrayOutputFilePath = Path.Combine(arguments.OutputDirectory.FullName, "BirdClef50_training_Counts.txt"); FileTools.WriteArray2File(instanceNumbersPerSpecies, addLineNumbers, countsArrayOutputFilePath); // Initialise output data arrays Output output = new Output(); output.FileID = fileID; output.SpeciesID = speciesID; output.InstanceNumbersPerSpecies = instanceNumbersPerSpecies; output.ReducedSpectralLength = reducedSpectralLength; // INIT array of frame counts output.FrameNumbersPerInstance = frameNumbersPerInstance; // matrix: each row= one instance; each column = one feature output.InstanceFeatureMatrix = instanceFeatureMatrix; output.Weights = weights; return(output); } // GetInstanceRepresentations()
/// <summary> /// Remove events whose acoustic profile does not match that of a flying fox. /// </summary> /// <param name="events">unfiltered acoustic events.</param> /// <param name="sonogram">includes matrix of spectrogram values.</param> /// <returns>filtered acoustic events.</returns> private static List <AcousticEvent> FilterEventsForSpectralProfile(List <AcousticEvent> events, BaseSonogram sonogram) { double[,] spectrogramData = sonogram.Data; //int colCount = spectrogramData.GetLength(1); // The following freq bins are used to demarcate freq bands for spectral tests below. // The hertz values are hard coded but could be included in the config.yml file. int maxBin = (int)Math.Round(8000 / sonogram.FBinWidth); int fourKiloHzBin = (int)Math.Round(4000 / sonogram.FBinWidth); int oneKiloHzBin = (int)Math.Round(1000 / sonogram.FBinWidth); var filteredEvents = new List <AcousticEvent>(); foreach (AcousticEvent ae in events) { int startFrame = ae.Oblong.RowTop; //int endFrame = ae.Oblong.RowBottom; // get all the frames of the acoustic event //var subMatrix = DataTools.Submatrix(spectrogramData, startFrame, 0, endFrame, colCount - 1); // get only the frames from centre of the acoustic event var subMatrix = DataTools.Submatrix(spectrogramData, startFrame + 1, 0, startFrame + 4, maxBin); var spectrum = MatrixTools.GetColumnAverages(subMatrix); var normalisedSpectrum = DataTools.normalise(spectrum); normalisedSpectrum = DataTools.filterMovingAverageOdd(normalisedSpectrum, 11); var maxId = DataTools.GetMaxIndex(normalisedSpectrum); //var hzMax = (int)Math.Ceiling(maxId * sonogram.FBinWidth); // Do TESTS to determine if event has spectrum matching a Flying fox. // Test 1: Spectral maximum should be below 4 kHz. bool passTest1 = maxId < fourKiloHzBin; // Test 2: There should be little energy in 0-1 kHz band. var subband1Khz = DataTools.Subarray(normalisedSpectrum, 0, oneKiloHzBin); double bandArea1 = subband1Khz.Sum(); double energyRatio1 = bandArea1 / normalisedSpectrum.Sum(); // 0.125 = 1/8. i.e. test requires that energy in 0-1kHz band is less than average in all 8 kHz bands // 0.0938 = 3/32. i.e. test requires that energy in 0-1kHz band is less than 3/4 average in all 8 kHz bands // 0.0625 = 1/16. i.e. test requires that energy in 0-1kHz band is less than half average in all 8 kHz bands bool passTest2 = !(energyRatio1 > 0.1); // Test 3: There should be little energy in 4-5 kHz band. var subband4Khz = DataTools.Subarray(normalisedSpectrum, fourKiloHzBin, oneKiloHzBin); double bandArea2 = subband4Khz.Sum(); double energyRatio2 = bandArea2 / normalisedSpectrum.Sum(); bool passTest3 = !(energyRatio2 > 0.125); // TODO write method to determine similarity of spectrum to a true flying fox spectrum. // Problem: it is not certain how variable the FF spectra are. // In ten minutes of recording used so far, which include 14-15 obvious calls, there appear to be two spectral types. // One type has three peaks at around 1.5 kHz, 3 kHz and 6 kHz. // The other type have two peaks around 2.5 and 5.5 kHz. //if (passTest1) //if (true) if (passTest1 && passTest2 && passTest3) { filteredEvents.Add(ae); //DEBUG SPECTRAL PROFILES: UNCOMMENT following lines to get spectral profiles of the events. /* * double startSecond = ae.EventStartSeconds - ae.SegmentStartSeconds; * string name = "CallSpectrum " + (ae.SegmentStartSeconds / 60) + "m" + (int)Math.Floor(startSecond) + "s hzMax" + hzMax; * var bmp2 = GraphsAndCharts.DrawGraph(name, normalisedSpectrum, 100); * bmp2.Save(Path.Combine(@"PATH\Towsey.PteropusSpecies", name + ".png")); */ } } return(filteredEvents); }
} //PruneClusters2() /// <summary> /// returns a value between 0-1 /// 1- fractional Hamming Distance /// </summary> public static double HammingSimilarity(double[] v1, double[] v2) { int hammingDistance = DataTools.HammingDistance(v1, v2); return(1 - (hammingDistance / (double)v1.Length)); }
/// <summary> /// THis method does the work. /// </summary> /// <param name="audioRecording">the recording.</param> /// <param name="configuration">the config file.</param> /// <param name="profileName">name of call/event type to be found.</param> /// <param name="segmentStartOffset">where one segment is located in the total recording.</param> /// <returns>a list of events.</returns> private static RecognizerResults WingBeats(AudioRecording audioRecording, Config configuration, string profileName, TimeSpan segmentStartOffset) { ConfigFile.TryGetProfile(configuration, profileName, out var profile); // get the common properties string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "Pteropus species"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "Pteropus"; // The following parameters worked well on a ten minute recording containing 14-16 calls. // Note: if you lower the dB threshold, you need to increase maxDurationSeconds int minHz = profile.GetIntOrNull(AnalysisKeys.MinHz) ?? 100; int maxHz = profile.GetIntOrNull(AnalysisKeys.MaxHz) ?? 3000; double minDurationSeconds = profile.GetDoubleOrNull(AnalysisKeys.MinDuration) ?? 1.0; double maxDurationSeconds = profile.GetDoubleOrNull(AnalysisKeys.MaxDuration) ?? 10.0; double decibelThreshold = profile.GetDoubleOrNull("DecibelThreshold") ?? 6.0; double dctDuration = profile.GetDoubleOrNull("DctDuration") ?? 1.0; double dctThreshold = profile.GetDoubleOrNull("DctThreshold") ?? 0.5; double minOscFreq = profile.GetDoubleOrNull("MinOscilFreq") ?? 4.0; double maxOscFreq = profile.GetDoubleOrNull("MaxOscilFreq") ?? 6.0; double eventThreshold = profile.GetDoubleOrNull("EventThreshold") ?? 0.3; //###################### //2. Don't use samples in this recognizer. //var samples = audioRecording.WavReader.Samples; //Instead, convert each segment to a spectrogram. var sonogram = GetSonogram(configuration, audioRecording); var decibelArray = SNR.CalculateFreqBandAvIntensity(sonogram.Data, minHz, maxHz, sonogram.NyquistFrequency); // Look for wing beats using oscillation detector /* * int scoreSmoothingWindow = 11; // sets a default that was good for Cane toad * Oscillations2019.Execute( * (SpectrogramStandard)sonogram, * minHz, * maxHz, * decibelThreshold, * dctDuration, * (int)Math.Floor(minOscFreq), * (int)Math.Floor(maxOscFreq), * dctThreshold, * eventThreshold, * minDurationSeconds, * maxDurationSeconds, * scoreSmoothingWindow, * out var scores, * out var acousticEvents, * //out var hits, * segmentStartOffset); */ Oscillations2012.Execute( (SpectrogramStandard)sonogram, minHz, maxHz, //decibelThreshold, dctDuration, (int)Math.Floor(minOscFreq), (int)Math.Floor(maxOscFreq), dctThreshold, eventThreshold, minDurationSeconds, maxDurationSeconds, out var scores, out var acousticEvents, out var hits, segmentStartOffset); // prepare plots double intensityNormalisationMax = 3 * decibelThreshold; var normThreshold = decibelThreshold / intensityNormalisationMax; var normalisedIntensityArray = DataTools.NormaliseInZeroOne(decibelArray, 0, intensityNormalisationMax); var plot1 = new Plot(speciesName + " Wing-beat band", normalisedIntensityArray, normThreshold); var plot2 = new Plot(speciesName + " Wing-beat Osc Score", scores, eventThreshold); var plots = new List <Plot> { plot1, plot2 }; // ###################################################################### // add additional information about the recording and sonogram properties from which the event is derived. acousticEvents.ForEach(ae => { ae.FileName = audioRecording.BaseName; ae.SpeciesName = speciesName; ae.Name = abbreviatedSpeciesName + profileName; ae.Profile = profileName; ae.SegmentDurationSeconds = audioRecording.Duration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; var frameOffset = sonogram.FrameStep; var frameDuration = sonogram.FrameDuration; ae.SetTimeAndFreqScales(frameOffset, frameDuration, sonogram.FBinWidth); //UNCOMMENT following lines to get spectral profiles of the Wingbeat events. /* double[,] spectrogramData = sonogram.Data; * int maxBin = (int)Math.Round(8000 / sonogram.FBinWidth); * double startSecond = ae.EventStartSeconds - ae.SegmentStartSeconds; * int startFrame = (int)Math.Round(startSecond / sonogram.FrameStep); * int frameLength = (int)Math.Round(ae.EventDurationSeconds / sonogram.FrameStep); * int endFrame = startFrame + frameLength; * * // get only the frames from centre of the acoustic event * var subMatrix = DataTools.Submatrix(spectrogramData, startFrame + 10, 0, endFrame - 10, maxBin); * var spectrum = MatrixTools.GetColumnAverages(subMatrix); * var normalisedSpectrum = DataTools.normalise(spectrum); * normalisedSpectrum = DataTools.filterMovingAverageOdd(normalisedSpectrum, 11); * var maxId = DataTools.GetMaxIndex(normalisedSpectrum); * var hzMax = (int)Math.Ceiling(maxId * sonogram.FBinWidth); * string name = "BeatSpectrum " + (ae.SegmentStartSeconds / 60) + "m" + (int)Math.Floor(startSecond) + "s hzMax" + hzMax; * var bmp2 = GraphsAndCharts.DrawGraph(name, normalisedSpectrum, 100); * * //Set required path * bmp2.Save(Path.Combine(@"C:\PATH", name + ".png")); */ }); return(new RecognizerResults() { Events = acousticEvents, Hits = null, ScoreTrack = null, Plots = plots, Sonogram = sonogram, }); }
public Tuple <int, int, int[], List <double[]> > TrainNet(List <double[]> trainingData, int maxIter, int seed, int initialWtCount) { int dataSetSize = trainingData.Count; int[] randomArray = RandomNumber.RandomizeNumberOrder(dataSetSize, seed); //randomize order of trn set // bool skippedBecauseFull; int[] inputCategory = new int[dataSetSize]; //stores the winning OP node for each current input signal int[] prevCategory = new int[dataSetSize]; //stores the winning OP node for each previous input signal this.InitialiseWtArrays(trainingData, randomArray, initialWtCount); //{********* GO THROUGH THE TRAINING SET for 1 to MAX ITERATIONS *********} //repeat //{training set until max iter or trn set learned} int[] opNodeWins = null; //stores the number of times each OP node wins int iterNum = 0; bool trainSetLearned = false; // : boolean; while (!trainSetLearned && iterNum < maxIter) { iterNum++; opNodeWins = new int[this.OPSize]; //stores the number of times each OP node wins //initialise convergence criteria. Want stable F2node allocations trainSetLearned = true; int changedCategory = 0; //{READ AND PROCESS signals until end of the data file} for (int sigNum = 0; sigNum < dataSetSize; sigNum++) { //select an input signal. Later use sigID to enable test of convergence int sigID = sigNum; // do signals in order if (RandomiseTrnSetOrder) { sigID = randomArray[sigNum]; //pick at random } //{*********** PASS ONE INPUT SIGNAL THROUGH THE NETWORK ***********} double[] OP = this.PropagateIP2OP(trainingData[sigID]); //output = AND divided by OR of two vectors int index = DataTools.GetMaxIndex(OP); double winningOP = OP[index]; //create new category if similarity OP of best matching node is too low if (winningOP < this.VigilanceRho) { this.ChangeWtsOfFirstUncommittedNode(trainingData[sigID]); } inputCategory[sigID] = index; //winning F2 node for current input opNodeWins[index]++; //{test if training set is learned ie each signal is classified to the same F2 node as previous iteration} if (inputCategory[sigID] != prevCategory[sigID]) { trainSetLearned = false; changedCategory++; } } //end loop over all signal inputs //set the previous categories for (int x = 0; x < dataSetSize; x++) { prevCategory[x] = inputCategory[x]; } //remove committed F2 nodes that are not having wins for (int j = 0; j < this.OPSize; j++) { if (this.committedNode[j] && opNodeWins[j] == 0) { this.committedNode[j] = false; } } if (Verbose) { LoggedConsole.WriteLine(" iter={0:D2} committed=" + this.CountCommittedF2Nodes() + "\t changedCategory=" + changedCategory, iterNum); } if (trainSetLearned) { break; } } //end of while (! trainSetLearned or (iterNum < maxIter) or terminate); return(Tuple.Create(iterNum, this.CountCommittedF2Nodes(), inputCategory, this.wts)); } //TrainNet()
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="audioRecording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording audioRecording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { const double minAmplitudeThreshold = 0.1; const int percentile = 5; const double scoreThreshold = 0.3; const bool doFiltering = true; const int windowWidth = 1024; const int signalBuffer = windowWidth * 2; //string path = @"C:\SensorNetworks\WavFiles\Freshwater\savedfortest.wav"; //audioRecording.Save(path); // this does not work int sr = audioRecording.SampleRate; int nyquist = audioRecording.Nyquist; // Get a value from the config file - with a backup default //int minHz = (int?)configuration[AnalysisKeys.MinHz] ?? 600; // Get a value from the config file - with no default, throw an exception if value is not present //int maxHz = ((int?)configuration[AnalysisKeys.MaxHz]).Value; // Get a value from the config file - without a string accessor, as a double //double someExampleSettingA = (double?)configuration.someExampleSettingA ?? 0.0; // common properties //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // min score for an acceptable event double eventThreshold = (double)configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold); // get samples var samples = audioRecording.WavReader.Samples; double[] bandPassFilteredSignal = null; if (doFiltering) { // high pass filter int windowLength = 71; double[] highPassFilteredSignal; DSP_IIRFilter.ApplyMovingAvHighPassFilter(samples, windowLength, out highPassFilteredSignal); //DSP_IIRFilter filter2 = new DSP_IIRFilter("Chebyshev_Highpass_400"); //int order2 = filter2.order; //filter2.ApplyIIRFilter(samples, out highPassFilteredSignal); // Amplify 40dB and clip to +/-1.0; double factor = 100; // equiv to 20dB highPassFilteredSignal = DspFilters.AmplifyAndClip(highPassFilteredSignal, factor); //low pass filter string filterName = "Chebyshev_Lowpass_5000, scale*5"; DSP_IIRFilter filter = new DSP_IIRFilter(filterName); int order = filter.order; //System.LoggedConsole.WriteLine("\nTest " + filterName + ", order=" + order); filter.ApplyIIRFilter(highPassFilteredSignal, out bandPassFilteredSignal); } else // do not filter because already filtered - using Chris's filtered recording { bandPassFilteredSignal = samples; } // calculate an amplitude threshold that is above Nth percentile of amplitudes in the subsample int[] histogramOfAmplitudes; double minAmplitude; double maxAmplitude; double binWidth; int window = 66; Histogram.GetHistogramOfWaveAmplitudes(bandPassFilteredSignal, window, out histogramOfAmplitudes, out minAmplitude, out maxAmplitude, out binWidth); int percentileBin = Histogram.GetPercentileBin(histogramOfAmplitudes, percentile); double amplitudeThreshold = (percentileBin + 1) * binWidth; if (amplitudeThreshold < minAmplitudeThreshold) { amplitudeThreshold = minAmplitudeThreshold; } bool doAnalysisOfKnownExamples = true; if (doAnalysisOfKnownExamples) { // go to fixed location to check //1:02.07, 1:07.67, 1:12.27, 1:12.42, 1:12.59, 1:12.8, 1.34.3, 1:35.3, 1:40.16, 1:50.0, 2:05.9, 2:06.62, 2:17.57, 2:21.0 //2:26.33, 2:43.07, 2:43.15, 3:16.55, 3:35.09, 4:22.44, 4:29.9, 4:42.6, 4:51.48, 5:01.8, 5:21.15, 5:22.72, 5:32.37, 5.36.1, //5:42.82, 6:03.5, 6:19.93, 6:21.55, 6:42.0, 6:42.15, 6:46.44, 7:12.17, 7:42.65, 7:45.86, 7:46.18, 7:52.38, 7:59.11, 8:10.63, //8:14.4, 8:14.63, 8_15_240, 8_46_590, 8_56_590, 9_25_77, 9_28_94, 9_30_5, 9_43_9, 10_03_19, 10_24_26, 10_24_36, 10_38_8, //10_41_08, 10_50_9, 11_05_13, 11_08_63, 11_44_66, 11_50_36, 11_51_2, 12_04_93, 12_10_05, 12_20_78, 12_27_0, 12_38_5, //13_02_25, 13_08_18, 13_12_8, 13_25_24, 13_36_0, 13_50_4, 13_51_2, 13_57_87, 14_15_00, 15_09_74, 15_12_14, 15_25_79 //double[] times = { 2.2, 26.589, 29.62 }; //double[] times = { 2.2, 3.68, 10.83, 24.95, 26.589, 27.2, 29.62 }; //double[] times = { 2.2, 3.68, 10.83, 24.95, 26.589, 27.2, 29.62, 31.39, 62.1, 67.67, 72.27, 72.42, 72.59, 72.8, 94.3, 95.3, // 100.16, 110.0, 125.9, 126.62, 137.57, 141.0, 146.33, 163.07, 163.17, 196.55, 215.09, 262.44, 269.9, 282.6, // 291.48, 301.85, 321.18, 322.72, 332.37, 336.1, 342.82, 363.5, 379.93, 381.55, 402.0, 402.15, 406.44, 432.17, // 462.65, 465.86, 466.18, 472.38, 479.14, 490.63, 494.4, 494.63, 495.240, 526.590, 536.590, 565.82, 568.94, // 570.5, 583.9, 603.19, 624.26, 624.36, 638.8, 641.08, 650.9, 65.13, 68.63, 704.66, // 710.36, 711.2, 724.93, 730.05, 740.78, 747.05, 758.5, 782.25, 788.18, 792.8, // 805.24, 816.03, 830.4, 831.2, 837.87, 855.02, 909.74, 912.14, 925.81 }; var filePath = new FileInfo(@"C:\SensorNetworks\WavFiles\Freshwater\GruntSummaryRevisedAndEditedByMichael.csv"); List <CatFishCallData> data = Csv.ReadFromCsv <CatFishCallData>(filePath, true).ToList(); //var catFishCallDatas = data as IList<CatFishCallData> ?? data.ToList(); int count = data.Count(); var subSamplesDirectory = outputDirectory.CreateSubdirectory("testSubsamples_5000LPFilter"); //for (int t = 0; t < times.Length; t++) foreach (var fishCall in data) { //Image bmp1 = IctalurusFurcatus.AnalyseLocation(bandPassFilteredSignal, sr, times[t], windowWidth); // use following line where using time in seconds //int location = (int)Math.Round(times[t] * sr); //assume location points to start of grunt //double[] subsample = DataTools.Subarray(bandPassFilteredSignal, location - signalBuffer, 2 * signalBuffer); // use following line where using sample int location1 = fishCall.Sample / 2; //assume Chris's sample location points to centre of grunt. Divide by 2 because original recording was 44100. int location = (int)Math.Round(fishCall.TimeSeconds * sr); //assume location points to centre of grunt double[] subsample = DataTools.Subarray(bandPassFilteredSignal, location - signalBuffer, 2 * signalBuffer); // calculate an amplitude threshold that is above 95th percentile of amplitudes in the subsample //int[] histogramOfAmplitudes; //double minAmplitude; //double maxAmplitude; //double binWidth; //int window = 70; //int percentile = 90; //Histogram.GetHistogramOfWaveAmplitudes(subsample, window, out histogramOfAmplitudes, out minAmplitude, out maxAmplitude, out binWidth); //int percentileBin = Histogram.GetPercentileBin(histogramOfAmplitudes, percentile); //double amplitudeThreshold = (percentileBin + 1) * binWidth; //if (amplitudeThreshold < minAmplitudeThreshold) amplitudeThreshold = minAmplitudeThreshold; double[] scores1 = AnalyseWaveformAtLocation(subsample, amplitudeThreshold, scoreThreshold); string title1 = $"scores={fishCall.Timehms}"; Image bmp1 = GraphsAndCharts.DrawGraph(title1, scores1, subsample.Length, 300, 1); //bmp1.Save(path1.FullName); string title2 = $"tStart={fishCall.Timehms}"; Image bmp2 = GraphsAndCharts.DrawWaveform(title2, subsample, 1); var path1 = subSamplesDirectory.CombineFile($"scoresForTestSubsample_{fishCall.TimeSeconds}secs.png"); //var path2 = subSamplesDirectory.CombineFile($@"testSubsample_{times[t]}secs.wav.png"); Image[] imageList = { bmp2, bmp1 }; Image bmp3 = ImageTools.CombineImagesVertically(imageList); bmp3.Save(path1.FullName); //write wave form to txt file for later work in XLS //var path3 = subSamplesDirectory.CombineFile($@"testSubsample_{times[t]}secs.wav.csv"); //signalBuffer = 800; //double[] subsample2 = DataTools.Subarray(bandPassFilteredSignal, location - signalBuffer, 3 * signalBuffer); //FileTools.WriteArray2File(subsample2, path3.FullName); } } int signalLength = bandPassFilteredSignal.Length; // count number of 1000 sample segments int blockLength = 1000; int blockCount = signalLength / blockLength; int[] indexOfMax = new int[blockCount]; double[] maxInBlock = new double[blockCount]; for (int i = 0; i < blockCount; i++) { double max = -2.0; int blockStart = blockLength * i; for (int s = 0; s < blockLength; s++) { double absValue = Math.Abs(bandPassFilteredSignal[blockStart + s]); if (absValue > max) { max = absValue; maxInBlock[i] = max; indexOfMax[i] = blockStart + s; } } } // transfer max values to a list var indexList = new List <int>(); for (int i = 1; i < blockCount - 1; i++) { // only find the blocks that contain a max value that is > neighbouring blocks if (maxInBlock[i] > maxInBlock[i - 1] && maxInBlock[i] > maxInBlock[i + 1]) { indexList.Add(indexOfMax[i]); } //ALTERNATIVELY // look at max in each block //indexList.Add(indexOfMax[i]); } // now process neighbourhood of each max int binCount = windowWidth / 2; FFT.WindowFunc wf = FFT.Hamming; var fft = new FFT(windowWidth, wf); int maxHz = 1000; double hzPerBin = nyquist / (double)binCount; int requiredBinCount = (int)Math.Round(maxHz / hzPerBin); // init list of events List <AcousticEvent> events = new List <AcousticEvent>(); double[] scores = new double[signalLength]; // init of score array int id = 0; foreach (int location in indexList) { //System.LoggedConsole.WriteLine("Location " + location + ", id=" + id); int start = location - binCount; if (start < 0) { continue; } int end = location + binCount; if (end >= signalLength) { continue; } double[] subsampleWav = DataTools.Subarray(bandPassFilteredSignal, start, windowWidth); var spectrum = fft.Invoke(subsampleWav); // convert to power spectrum = DataTools.SquareValues(spectrum); spectrum = DataTools.filterMovingAverageOdd(spectrum, 3); spectrum = DataTools.normalise(spectrum); var subBandSpectrum = DataTools.Subarray(spectrum, 1, requiredBinCount); // ignore DC in bin zero. // now do some tests on spectrum to determine if it is a candidate grunt bool eventFound = false; double[] scoreArray = CalculateScores(subBandSpectrum, windowWidth); double score = scoreArray[0]; if (score > scoreThreshold) { eventFound = true; } if (eventFound) { for (int i = location - binCount; i < location + binCount; i++) { scores[location] = score; } var startTime = TimeSpan.FromSeconds((location - binCount) / (double)sr); string startLabel = startTime.Minutes + "." + startTime.Seconds + "." + startTime.Milliseconds; Image image4 = GraphsAndCharts.DrawWaveAndFft(subsampleWav, sr, startTime, spectrum, maxHz * 2, scoreArray); var path4 = outputDirectory.CreateSubdirectory("subsamples").CombineFile($@"subsample_{location}_{startLabel}.png"); image4.Save(path4.FullName); // have an event, store the data in the AcousticEvent class double duration = 0.2; int minFreq = 50; int maxFreq = 1000; var anEvent = new AcousticEvent(segmentStartOffset, startTime.TotalSeconds, duration, minFreq, maxFreq); anEvent.Name = "grunt"; //anEvent.Name = DataTools.WriteArrayAsCsvLine(subBandSpectrum, "f4"); anEvent.Score = score; events.Add(anEvent); } id++; } // make a spectrogram var config = new SonogramConfig { NoiseReductionType = NoiseReductionType.Standard, NoiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.0, }; var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader); //// when the value is accessed, the indices are calculated //var indices = getSpectralIndexes.Value; //// check if the indices have been calculated - you shouldn't actually need this //if (getSpectralIndexes.IsValueCreated) //{ // // then indices have been calculated before //} var plot = new Plot(this.DisplayName, scores, eventThreshold); return(new RecognizerResults() { Events = events, Hits = null, //ScoreTrack = null, Plots = plot.AsList(), Sonogram = sonogram, }); }
/// <summary> /// The CORE ANALYSIS METHOD /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double decibelThreshold = double.Parse(configDict["DECIBEL_THRESHOLD"]); //dB double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double callDuration = double.Parse(configDict["CALL_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); int callSpan = (int)Math.Round(callDuration * framesPerSecond); //############################################################################################################################################# //ii: DETECT HARMONICS var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan); double[] dBArray = results.Item1; double[] intensity = results.Item2; //an array of periodicity scores double[] periodicity = results.Item3; //intensity = DataTools.filterMovingAverage(intensity, 3); int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < harmonicIntensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. //expect humans to have max < 1000 Hz double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < noiseBound; j++) { spectrum[j] = 0.0; } int maxIndex = DataTools.GetMaxIndex(spectrum); int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); double discount = 1.0; if (freqWithMaxPower < 1200) { discount = 0.0; } if (intensity[r] > harmonicIntensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS double maxPossibleScore = 0.5; int halfCallSpan = callSpan / 2; var predictedEvents = new List <AcousticEvent>(); for (int i = 0; i < rowCount; i++) { //assume one score position per crow call if (scoreArray[i] < 0.001) { continue; } double startTime = (i - halfCallSpan) / framesPerSecond; AcousticEvent ev = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz); ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth); ev.Score = scoreArray[i]; ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold //ev.Score_MaxPossible = maxPossibleScore; predictedEvents.Add(ev); } //for loop Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()
static void ConvertStringToSql(StringBuilder stringBuilder, string value) { DataTools.ConvertStringToSql(stringBuilder, "||", "'", AppendConversion, value); }
static void ConvertStringToSql(StringBuilder stringBuilder, string value) { DataTools.ConvertStringToSql(stringBuilder, "||", null, AppendConversion, value, _extraEscapes); }
///// <summary> ///// This method merges all files of acoustic indices derived from a sequence of consecutive 6 hour recording, ///// that have a total duration of 24 hours. This was necesarry to deal with Jason's new regime of doing 24 hour recordings ///// in blocks of 6 hours. ///// </summary> //public static void ConcatenateSpectralIndexFiles1() //{ // // create an array that contains the names of csv file to be read. // // The file names must be in the temporal order rquired for the resulting spectrogram image. // string topLevelDirectory = @"C:\SensorNetworks\Output\SERF\SERFIndices_2013April01"; // string fileStem = "SERF_20130401"; // string[] names = {"SERF_20130401_000025_000", // "SERF_20130401_064604_000", // "SERF_20130401_133143_000", // "SERF_20130401_201721_000", // }; // //string topLevelDirectory = @"C:\SensorNetworks\Output\SERF\SERFIndices_2013June19"; // //string fileStem = "SERF_20130619"; // //string[] names = {"SERF_20130619_000038_000", // // "SERF_20130619_064615_000", // // "SERF_20130619_133153_000", // // "SERF_20130619_201730_000", // // }; // // ############################################################### // // VERY IMPORTANT: MUST MAKE SURE THE BELOW ARE CONSISTENT WITH THE DATA !!!!!!!!!!!!!!!!!!!! // int sampleRate = 17640; // int frameWidth = 256; // // ############################################################### // string[] level2Dirs = {names[0]+".wav", // names[1]+".wav", // names[2]+".wav", // names[3]+".wav", // }; // string level3Dir = "Towsey.Acoustic"; // string[] dirNames = {topLevelDirectory+@"\"+level2Dirs[0]+@"\"+level3Dir, // topLevelDirectory+@"\"+level2Dirs[1]+@"\"+level3Dir, // topLevelDirectory+@"\"+level2Dirs[2]+@"\"+level3Dir, // topLevelDirectory+@"\"+level2Dirs[3]+@"\"+level3Dir // }; // string[] fileExtentions = { ".ACI.csv", // ".AVG.csv", // ".BGN.csv", // ".CVR.csv", // ".TEN.csv", // ".VAR.csv", // "_Towsey.Acoustic.Indices.csv" // }; // // this loop reads in all the Indices from consecutive csv files // foreach (string extention in fileExtentions) // { // Console.WriteLine("\n\nFILE TYPE: " + extention); // List<string> lines = new List<string>(); // for (int i = 0; i < dirNames.Length; i++) // { // string fName = names[i] + extention; // string path = Path.Combine(dirNames[i], fName); // var fileInfo = new FileInfo(path); // Console.WriteLine(path); // if (!fileInfo.Exists) // Console.WriteLine("ABOVE FILE DOES NOT EXIST"); // var ipLines = FileTools.ReadTextFile(path); // if (i != 0) // { // ipLines.RemoveAt(0); //remove the first line // } // lines.AddRange(ipLines); // } // string opFileName = fileStem + extention; // string opPath = Path.Combine(topLevelDirectory, opFileName); // FileTools.WriteTextFile(opPath, lines, false); // } //end of all file extentions // TimeSpan minuteOffset = TimeSpan.Zero; // assume recordings start at midnight // TimeSpan xScale = TimeSpan.FromMinutes(60); // double backgroundFilterCoeff = SpectrogramConstants.BACKGROUND_FILTER_COEFF; // string colorMap = SpectrogramConstants.RGBMap_ACI_ENT_CVR; // var cs1 = new LDSpectrogramRGB(minuteOffset, xScale, sampleRate, frameWidth, colorMap); // cs1.BaseName = fileStem; // cs1.ColorMode = colorMap; // cs1.BackgroundFilter = backgroundFilterCoeff; // var dirInfo = new DirectoryInfo(topLevelDirectory); // cs1.ReadSpectralIndices(dirInfo, fileStem); // reads all known indices files // if (cs1.GetCountOfSpectrogramMatrices() == 0) // { // Console.WriteLine("There are no spectrogram matrices in the dictionary."); // return; // } // cs1.DrawGreyScaleSpectrograms(dirInfo, fileStem); // colorMap = SpectrogramConstants.RGBMap_ACI_ENT_CVR; // Image image1 = cs1.DrawFalseColourSpectrogram("NEGATIVE", colorMap); // int nyquist = cs1.SampleRate / 2; // int herzInterval = 1000; // string title = String.Format("FALSE-COLOUR SPECTROGRAM: {0} (scale:hours x kHz) (colour: R-G-B={1})", fileStem, colorMap); // Image titleBar = LDSpectrogramRGB.DrawTitleBarOfFalseColourSpectrogram(title, image1.Width); // image1 = LDSpectrogramRGB.FrameLDSpectrogram(image1, titleBar, minuteOffset, cs1.IndexCalculationDuration, cs1.XTicInterval, nyquist, herzInterval); // image1.Save(Path.Combine(dirInfo.FullName, fileStem + "." + colorMap + ".png")); // colorMap = "BGN-AVG-VAR"; // Image image2 = cs1.DrawFalseColourSpectrogram("NEGATIVE", colorMap); // title = String.Format("FALSE-COLOUR SPECTROGRAM: {0} (scale:hours x kHz) (colour: R-G-B={1})", fileStem, colorMap); // titleBar = LDSpectrogramRGB.DrawTitleBarOfFalseColourSpectrogram(title, image2.Width); // image2 = LDSpectrogramRGB.FrameLDSpectrogram(image2, titleBar, minuteOffset, cs1.IndexCalculationDuration, cs1.XTicInterval, nyquist, herzInterval); // image2.Save(Path.Combine(dirInfo.FullName, fileStem + "." + colorMap + ".png")); // Image[] array = new Image[2]; // array[0] = image1; // array[1] = image2; // Image image3 = ImageTools.CombineImagesVertically(array); // image3.Save(Path.Combine(dirInfo.FullName, fileStem + ".2MAPS.png")); //} /// <summary> /// This method rearranges the content of a false-colour spectrogram according to the acoustic cluster or acoustic state to which each minute belongs. /// The time scale is added in afterwards - must overwrite the previous time scale and title bar. /// THis method was writtent to examine the cluster content of recordings analysed by Mangalam using a 10x10 SOM. /// The output image was used in the paper presented by Mangalam to BDVA2015 in Tasmania. (Big data, visual analytics). /// </summary> public static void ExtractSOMClusters1() { string opDir = @"C:\SensorNetworks\Output\Mangalam_BDVA2015\"; //string fileStem = @"BYR2_20131016"; //string inputImagePath = @"C:\SensorNetworks\Output\Mangalam_BDVA2015\BYR2_20131016.ACI-ENT-EVN.png"; //string clusterFile = opDir + "SE 13 Oct - Cluster-node list.csv"; //string fileStem = @"BYR2_20131017"; //string inputImagePath = opDir + fileStem + ".ACI-ENT-EVN.png"; //string clusterFile = opDir + "BY2-17Oct - node_clus_map.csv"; string fileStem = @"SERF-SE_20101013"; string inputImagePath = @"C:\SensorNetworks\Output\Mangalam_BDVA2015\SERF-SE_20101013.ACI-ENT-EVN.png"; string clusterFile = opDir + "SE 13 Oct - Cluster-node list.csv"; string opFileName = fileStem + ".SOMClusters.png"; int clusterCount = 27; // from fuzzy c-clustering int nodeCount = 100; // from the 10x10 SOM List <Pen> pens = ImageTools.GetColorPalette(clusterCount); Pen whitePen = new Pen(Color.White); Pen blackPen = new Pen(Color.Black); //SizeF stringSize = new SizeF(); Font stringFont = new Font("Arial", 12, FontStyle.Bold); //Font stringFont = new Font("Tahoma", 9); // ############################################################### // VERY IMPORTANT: MUST MAKE SURE THE BELOW ARE CONSISTENT WITH THE DATA !!!!!!!!!!!!!!!!!!!! int sampleRate = 22050; int frameWidth = 256; int nyquist = sampleRate / 2; int herzInterval = 1000; TimeSpan minuteOffset = TimeSpan.Zero; // assume recordings start at midnight double backgroundFilterCoeff = SpectrogramConstants.BACKGROUND_FILTER_COEFF; string colorMap = SpectrogramConstants.RGBMap_ACI_ENT_EVN; string title = string.Format("SOM CLUSTERS of ACOUSTIC INDICES: recording {0}", fileStem); TimeSpan indexCalculationDuration = TimeSpan.FromSeconds(60); // seconds TimeSpan xTicInterval = TimeSpan.FromMinutes(60); // 60 minutes or one hour. int trackheight = 20; // ############################################################### // read in the assignment of cluster numbers to cluster LABEL string[] clusterLabel = { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a" }; // read the data file List <string> lines = FileTools.ReadTextFile(clusterFile); int lineCount = lines.Count; int[] clusterHistogram = new int[clusterCount]; //read in the image FileInfo fi = new FileInfo(inputImagePath); if (!fi.Exists) { Console.WriteLine("\n\n >>>>>>>> FILE DOES NOT EXIST >>>>>>: " + fi.Name); } Console.WriteLine("Reading file: " + fi.Name); Bitmap ipImage = ImageTools.ReadImage2Bitmap(fi.FullName); int imageWidth = ipImage.Width; int imageHt = ipImage.Height; //init the output image Image opImage = new Bitmap(imageWidth, imageHt); Graphics gr = Graphics.FromImage(opImage); gr.Clear(Color.Black); // construct cluster histogram for (int lineNumber = 0; lineNumber < lineCount; lineNumber++) { string[] words = lines[lineNumber].Split(','); int clusterID = int.Parse(words[2]); clusterHistogram[clusterID - 1]++; } // ranks cluster counts in descending order Tuple <int[], int[]> tuple = DataTools.SortArray(clusterHistogram); int[] sortOrder = tuple.Item1; // this loop re int opColumn = 0; int clusterStartColumn = 0; for (int id = 0; id < clusterCount; id++) { int sortID = sortOrder[id]; // create node array to store column images for this cluster List <Bitmap>[] nodeArray = new List <Bitmap> [nodeCount]; for (int n = 0; n < nodeCount; n++) { nodeArray[n] = new List <Bitmap>(); } Console.WriteLine("Reading CLUSTER: " + (sortID + 1) + " Label=" + clusterLabel[sortID]); // read through the entire list of minutes for (int lineNumber = 0; lineNumber < lineCount; lineNumber++) { if (lineNumber == 0) { clusterStartColumn = opColumn; } string[] words = lines[lineNumber].Split(','); int clusterID = int.Parse(words[2]) - 1; // -1 because matlab arrays start at 1. int nodeID = int.Parse(words[1]) - 1; if (clusterID == sortID) { // get image column Rectangle rectangle = new Rectangle(lineNumber, 0, 1, imageHt); Bitmap column = ipImage.Clone(rectangle, ipImage.PixelFormat); nodeArray[nodeID].Add(column); } } // cycle through the nodes and get the column images. // the purpose is to draw the column images in order of node number for (int n = 0; n < nodeCount; n++) { int imageCount = nodeArray[n].Count; if (nodeArray[n].Count == 0) { continue; } for (int i = 0; i < imageCount; i++) { Bitmap column = nodeArray[n][i]; gr.DrawImage(column, opColumn, 0); gr.DrawLine(pens[id], opColumn, trackheight, opColumn, trackheight + trackheight); gr.DrawLine(pens[id], opColumn, imageHt - trackheight, opColumn, imageHt); opColumn++; } //gr.DrawLine(blackPen, opColumn - 1, imageHt - trackheight, opColumn - 1, imageHt - 10); } //FileInfo fi = new FileInfo(topLevelDirectory + name); //Console.WriteLine("Reading file: " + fi.Name); if (id >= clusterCount - 1) { break; } gr.DrawLine(whitePen, opColumn - 1, 0, opColumn - 1, imageHt - trackheight - 1); gr.DrawLine(blackPen, opColumn - 1, imageHt - trackheight, opColumn - 1, imageHt); gr.DrawLine(blackPen, opColumn - 1, imageHt - trackheight, opColumn - 1, imageHt); int location = opColumn - ((opColumn - clusterStartColumn) / 2); gr.DrawString(clusterLabel[sortID], stringFont, Brushes.Black, new PointF(location - 10, imageHt - 19)); } ////Draw the title bar Image titleBar = DrawTitleBarOfClusterSpectrogram(title, imageWidth); gr.DrawImage(titleBar, 0, 0); ////Draw the x-axis time scale bar //int trackHeight = 20; //TimeSpan fullDuration = TimeSpan.FromTicks(indexCalculationDuration.Ticks * imageWidth); //Bitmap timeBmp = ImageTrack.DrawTimeTrack(fullDuration, TimeSpan.Zero, imageWidth, trackHeight); //spgmImage = LDSpectrogramRGB.FrameLDSpectrogram(spgmImage, titleBar, minuteOffset, indexCalculationDuration, xTicInterval, nyquist, herzInterval); //Graphics gr = Graphics.FromImage(spgmImage); ////gr.Clear(Color.Black); //gr.DrawImage(titleBar, 0, 0); //draw in the top spectrogram //gr.DrawImage(timeBmp, 0, 20); //draw in the top spectrogram //gr.DrawImage(timeBmp, 0, imageHeight - 20); //draw in the top spectrogram opImage.Save(Path.Combine(opDir, opFileName)); }
public void Execute(Arguments arguments) { LoggedConsole.WriteLine("feature learning process..."); var inputDir = @"D:\Mahnoosh\Liz\Least_Bittern\"; var inputPath = Path.Combine(inputDir, "TrainSet\\one_min_recordings"); var trainSetPath = Path.Combine(inputDir, "TrainSet\\train_data"); // var testSetPath = Path.Combine(inputDir, "TestSet"); var configPath = @"D:\Mahnoosh\Liz\Least_Bittern\FeatureLearningConfig.yml"; var resultDir = Path.Combine(inputDir, "FeatureLearning"); Directory.CreateDirectory(resultDir); // var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png"); // var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png"); // var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png"); // var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png"); // var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp"); // +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1-min recordings var configFile = configPath.ToFileInfo(); if (configFile == null) { throw new FileNotFoundException("No config file argument provided"); } else if (!configFile.Exists) { throw new ArgumentException($"Config file {configFile.FullName} not found"); } var configuration = ConfigFile.Deserialize <FeatureLearningSettings>(configFile); int patchWidth = (configuration.MaxFreqBin - configuration.MinFreqBin + 1) / configuration.NumFreqBand; var clusteringOutputList = FeatureLearning.UnsupervisedFeatureLearning(configuration, inputPath); List <double[][]> allBandsCentroids = new List <double[][]>(); for (int i = 0; i < clusteringOutputList.Count; i++) { var clusteringOutput = clusteringOutputList[i]; // writing centroids to a csv file // note that Csv.WriteToCsv can't write data types like dictionary<int, double[]> (problems with arrays) // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv // it might be a better way to do this string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv"); var clusterCentroids = clusteringOutput.ClusterIdCentroid.Values.ToArray(); Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix()); // sorting clusters based on size and output it to a csv file Dictionary <int, double> clusterIdSize = clusteringOutput.ClusterIdSize; int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize); // Write cluster ID and size to a CSV file string pathToClusterSizeCsvFile = Path.Combine(resultDir, "ClusterSize" + i.ToString() + ".csv"); Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize); // Draw cluster image directly from clustering output List <KeyValuePair <int, double[]> > list = clusteringOutput.ClusterIdCentroid.ToList(); double[][] centroids = new double[list.Count][]; for (int j = 0; j < list.Count; j++) { centroids[j] = list[j].Value; } allBandsCentroids.Add(centroids); List <double[, ]> allCentroids = new List <double[, ]>(); for (int k = 0; k < centroids.Length; k++) { // convert each centroid to a matrix in order of cluster ID // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight); // OR: in order of cluster size double[,] cent = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, configuration.PatchHeight); // normalize each centroid double[,] normCent = DataTools.normalise(cent); // add a row of zero to each centroid double[,] cent2 = PatchSampling.AddRow(normCent); allCentroids.Add(cent2); } // concatenate all centroids double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids); // Draw clusters var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix); clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone); var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp"); clusterImage.Save(outputClusteringImage); } // extracting features FeatureExtraction.UnsupervisedFeatureExtraction(configuration, allBandsCentroids, trainSetPath, resultDir); LoggedConsole.WriteLine("Done..."); }
private static void Main() { throw new NotSupportedException("THIS WILL FAIL IN PRODUCTION"); Log.WriteLine("TESTING METHODS IN CLASS FileTools\n\n"); bool doit1 = false; if (doit1) //test ReadTextFile(string fName) { string fName = testDir + "testTextFile.txt"; var array = ReadTextFile(fName); foreach (string line in array) { LoggedConsole.WriteLine(line); } }//end test ReadTextFile(string fName) bool doit2 = false; if (doit2) //test WriteTextFile(string fName) { string fName = testDir + "testOfWritingATextFile.txt"; var array = new List <string>(); array.Add("string1"); array.Add("string2"); array.Add("string3"); array.Add("string4"); array.Add("string5"); WriteTextFile(fName, array); }//end test WriteTextFile(string fName) bool doit3 = false; if (doit3) //test ReadDoubles2Matrix(string fName) { string fName = testDir + "testOfReadingMatrixFile.txt"; double[,] matrix = ReadDoubles2Matrix(fName); int rowCount = matrix.GetLength(0); //height int colCount = matrix.GetLength(1); //width //LoggedConsole.WriteLine("rowCount=" + rowCount + " colCount=" + colCount); DataTools.writeMatrix(matrix); }//end test ReadDoubles2Matrix(string fName) bool doit4 = true; if (doit4) //test Method(parameters) { string fName = testDir + "testWriteOfMatrix2File.txt"; double[,] matrix = { { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, }, { 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, }, { 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, }, }; WriteMatrix2File(matrix, fName); LoggedConsole.WriteLine("Wrote following matrix to file " + fName); DataTools.writeMatrix(matrix); }//end test Method(string fName) //COPY THIS TEST TEMPLATE bool doit5 = false; if (doit5) //test Method(parameters) { }//end test Method(string fName) Log.WriteLine("\nFINISHED"); //end Log.WriteLine("CLOSE CONSOLE"); //end } //end MAIN
/// <summary> /// Returns the following 18 values encapsulated in class EnvelopeAndFft /// 1) the minimum and maximum signal values /// 2) the average of absolute amplitudes for each frame /// 3) the minimum value in each frame /// 3) the maximum value in each frame. /// 3) the signal envelope as vector. i.e. the maximum of absolute amplitudes for each frame. /// 4) vector of frame energies /// 5) the high amplitude and clipping counts /// 6) the signal amplitude spectrogram /// 7) the power of the FFT Window, i.e. sum of squared window values. /// 8) the nyquist /// 9) the width of freq bin in Hz /// 10) the Nyquist bin ID /// AND OTHERS /// The returned info is used by Sonogram classes to draw sonograms and by Spectral Indices classes to calculate Spectral indices. /// Less than half the info is used to draw sonograms but it is difficult to disentangle calculation of all the info without /// reverting back to the old days when we used two classes and making sure they remain in synch. /// </summary> public static EnvelopeAndFft ExtractEnvelopeAndAmplSpectrogram( double[] signal, int sampleRate, double epsilon, int frameSize, int frameStep, string windowName = null) { // SIGNAL PRE-EMPHASIS helps with speech signals // Do not use this for environmental audio //if (config.DoPreemphasis) //{ // signal = DSP_Filters.PreEmphasis(signal, 0.96); //} int[,] frameIDs = FrameStartEnds(signal.Length, frameSize, frameStep); if (frameIDs == null) { throw new NullReferenceException("Thrown in EnvelopeAndFft.ExtractEnvelopeAndAmplSpectrogram(): int matrix, frameIDs, cannot be null."); } int frameCount = frameIDs.GetLength(0); // set up the FFT parameters if (windowName == null) { windowName = FFT.KeyHammingWindow; } FFT.WindowFunc w = FFT.GetWindowFunction(windowName); var fft = new FFT(frameSize, w); // init class which calculates the Matlab compatible .NET FFT double[,] spectrogram = new double[frameCount, fft.CoeffCount]; // init amplitude sonogram double minSignalValue = double.MaxValue; double maxSignalValue = double.MinValue; double[] average = new double[frameCount]; double[] minValues = new double[frameCount]; double[] maxValues = new double[frameCount]; double[] envelope = new double[frameCount]; double[] frameEnergy = new double[frameCount]; double[] frameDecibels = new double[frameCount]; // for all frames for (int i = 0; i < frameCount; i++) { int start = i * frameStep; int end = start + frameSize; // get average and envelope for current frame double frameMin = signal[start]; double frameMax = signal[start]; double frameSum = signal[start]; double total = Math.Abs(signal[start]); double maxAbsValue = total; double energy = 0; // for all values in frame for (int x = start + 1; x < end; x++) { if (signal[x] > maxSignalValue) { maxSignalValue = signal[x]; } if (signal[x] < minSignalValue) { minSignalValue = signal[x]; } frameSum += signal[x]; // Get frame min and max if (signal[x] < frameMin) { frameMin = signal[x]; } if (signal[x] > frameMax) { frameMax = signal[x]; } energy += signal[x] * signal[x]; // Get absolute signal average in current frame double absValue = Math.Abs(signal[x]); total += absValue; // Get the maximum absolute signal value in current frame if (absValue > maxAbsValue) { maxAbsValue = absValue; } } // end of frame double frameDc = frameSum / frameSize; minValues[i] = frameMin; maxValues[i] = frameMax; average[i] = total / frameSize; envelope[i] = maxAbsValue; frameEnergy[i] = energy / frameSize; frameDecibels[i] = 10 * Math.Log10(frameEnergy[i]); // remove DC value from signal values double[] signalMinusAv = new double[frameSize]; for (int j = 0; j < frameSize; j++) { signalMinusAv[j] = signal[start + j] - frameDc; } // generate the spectra of FFT AMPLITUDES - NOTE: f[0]=DC; f[64]=Nyquist var f1 = fft.InvokeDotNetFFT(signalMinusAv); // Previous alternative call to do the FFT and return amplitude spectrum //f1 = fft.Invoke(window); // Smooth spectrum to reduce variance // In the early days (pre-2010), we used to smooth the spectra to reduce sonogram variance. This is statistically correct thing to do. // Later, we stopped this for standard sonograms but kept it for calculating acoustic indices. // As of 28 March 2017, we are merging the two codes and keeping spectrum smoothing. // Will need to check the effect on spectrograms. int smoothingWindow = 3; f1 = DataTools.filterMovingAverage(f1, smoothingWindow); // transfer amplitude spectrum to spectrogram matrix for (int j = 0; j < fft.CoeffCount; j++) { spectrogram[i, j] = f1[j]; } } // end frames // Remove the DC column ie column zero from amplitude spectrogram. double[,] amplitudeSpectrogram = MatrixTools.Submatrix(spectrogram, 0, 1, spectrogram.GetLength(0) - 1, spectrogram.GetLength(1) - 1); // check the envelope for clipping. Accept a clip if two consecutive frames have max value = 1,0 Clipping.GetClippingCount(signal, envelope, frameStep, epsilon, out int highAmplitudeCount, out int clipCount); // get SNR data var snrData = new SNR(signal, frameIDs); return(new EnvelopeAndFft { // The following data is required when constructing sonograms Duration = TimeSpan.FromSeconds((double)signal.Length / sampleRate), Epsilon = epsilon, SampleRate = sampleRate, FrameCount = frameCount, FractionOfHighEnergyFrames = snrData.FractionOfHighEnergyFrames, WindowPower = fft.WindowPower, AmplitudeSpectrogram = amplitudeSpectrogram, // The below 11 variables are only used when calculating spectral and summary indices // energy level information ClipCount = clipCount, HighAmplitudeCount = highAmplitudeCount, MinSignalValue = minSignalValue, MaxSignalValue = maxSignalValue, // envelope info Average = average, MinFrameValues = minValues, MaxFrameValues = maxValues, Envelope = envelope, FrameEnergy = frameEnergy, FrameDecibels = frameDecibels, // freq scale info NyquistFreq = sampleRate / 2, NyquistBin = amplitudeSpectrogram.GetLength(1) - 1, FreqBinWidth = sampleRate / (double)amplitudeSpectrogram.GetLength(1) / 2, }); }
static void ConvertCharToSql(StringBuilder stringBuilder, char value) { DataTools.ConvertCharToSql(stringBuilder, "'", AppendConversionAction, value); }
/// <summary> /// THis method does the work. /// </summary> /// <param name="audioRecording">the recording.</param> /// <param name="configuration">the config file.</param> /// <param name="profileName">name of the call/event type.</param> /// <param name="segmentStartOffset">where one segment is located in the total recording.</param> /// <returns>a list of events.</returns> private static RecognizerResults TerritorialCall(AudioRecording audioRecording, Config configuration, string profileName, TimeSpan segmentStartOffset) { ConfigFile.TryGetProfile(configuration, profileName, out var profile); // get the common properties string speciesName = configuration[AnalysisKeys.SpeciesName] ?? "Pteropus species"; string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "Pteropus"; // The following parameters worked well on a ten minute recording containing 14-16 calls. // Note: if you lower the dB threshold, you need to increase maxDurationSeconds int minHz = profile.GetIntOrNull(AnalysisKeys.MinHz) ?? 800; int maxHz = profile.GetIntOrNull(AnalysisKeys.MaxHz) ?? 8000; double minDurationSeconds = profile.GetDoubleOrNull(AnalysisKeys.MinDuration) ?? 0.15; double maxDurationSeconds = profile.GetDoubleOrNull(AnalysisKeys.MaxDuration) ?? 0.5; double decibelThreshold = profile.GetDoubleOrNull(AnalysisKeys.DecibelThreshold) ?? 9.0; var minTimeSpan = TimeSpan.FromSeconds(minDurationSeconds); var maxTimeSpan = TimeSpan.FromSeconds(maxDurationSeconds); //###################### //2. Convert each segment to a spectrogram. var sonogram = GetSonogram(configuration, audioRecording); var decibelArray = SNR.CalculateFreqBandAvIntensity(sonogram.Data, minHz, maxHz, sonogram.NyquistFrequency); // prepare plots double intensityNormalisationMax = 3 * decibelThreshold; var eventThreshold = decibelThreshold / intensityNormalisationMax; var normalisedIntensityArray = DataTools.NormaliseInZeroOne(decibelArray, 0, intensityNormalisationMax); var plot = new Plot(speciesName + " Territory", normalisedIntensityArray, eventThreshold); var plots = new List <Plot> { plot }; //iii: CONVERT decibel SCORES TO ACOUSTIC EVENTS var acousticEvents = AcousticEvent.GetEventsAroundMaxima( decibelArray, segmentStartOffset, minHz, maxHz, decibelThreshold, minTimeSpan, maxTimeSpan, sonogram.FramesPerSecond, sonogram.FBinWidth); //iV add additional info to the acoustic events acousticEvents.ForEach(ae => { ae.FileName = audioRecording.BaseName; ae.SpeciesName = speciesName; ae.Name = abbreviatedSpeciesName + profileName; ae.Profile = profileName; ae.SegmentDurationSeconds = audioRecording.Duration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; }); acousticEvents = FilterEventsForSpectralProfile(acousticEvents, sonogram); return(new RecognizerResults() { Events = acousticEvents, Hits = null, ScoreTrack = null, Plots = plots, Sonogram = sonogram, }); }
public static SpectralStats GetModeAndOneTailedStandardDeviation(double[,] matrix, int binCount, int upperPercentile) { double[] values = DataTools.Matrix2Array(matrix); return(GetModeAndOneTailedStandardDeviation(values, binCount, upperPercentile)); }
}//Execute public static Tuple <double[]> Execute_SobelEdges(double[,] target, double dynamicRange, SpectrogramStandard sonogram, List <AcousticEvent> segments, int minHz, int maxHz, double minDuration) { Log.WriteLine("SEARCHING FOR EVENTS LIKE TARGET."); if (segments == null) { return(null); } int minBin = (int)(minHz / sonogram.FBinWidth); int maxBin = (int)(maxHz / sonogram.FBinWidth); int targetLength = target.GetLength(0); //adjust target's dynamic range to that set by user target = SNR.SetDynamicRange(target, 3.0, dynamicRange); //set event's dynamic range double[,] edgeTarget = ImageTools.SobelEdgeDetection(target, 0.4); double[] v1 = DataTools.Matrix2Array(edgeTarget); v1 = DataTools.normalise2UnitLength(v1); //string imagePath2 = @"C:\SensorNetworks\Output\FELT_Currawong\edgeTarget.png"; //var image = BaseSonogram.Data2ImageData(edgeTarget); //ImageTools.DrawMatrix(image, 1, 1, imagePath2); double[] scores = new double[sonogram.FrameCount]; foreach (AcousticEvent av in segments) { Log.WriteLine("SEARCHING SEGMENT."); int startRow = (int)Math.Round(av.TimeStart * sonogram.FramesPerSecond); int endRow = (int)Math.Round(av.TimeEnd * sonogram.FramesPerSecond); if (endRow >= sonogram.FrameCount) { endRow = sonogram.FrameCount; } int stopRow = endRow - targetLength; if (stopRow <= startRow) { stopRow = startRow + 1; //want minimum of one row } for (int r = startRow; r < stopRow; r++) { double[,] matrix = DataTools.Submatrix(sonogram.Data, r, minBin, r + targetLength - 1, maxBin); matrix = SNR.SetDynamicRange(matrix, 3.0, dynamicRange); //set event's dynamic range double[,] edgeMatrix = ImageTools.SobelEdgeDetection(matrix, 0.4); //string imagePath2 = @"C:\SensorNetworks\Output\FELT_Gecko\compare.png"; //var image = BaseSonogram.Data2ImageData(matrix); //ImageTools.DrawMatrix(image, 1, 1, imagePath2); double[] v2 = DataTools.Matrix2Array(edgeMatrix); v2 = DataTools.normalise2UnitLength(v2); double crossCor = DataTools.DotProduct(v1, v2); scores[r] = crossCor; //Log.WriteLine("row={0}\t{1:f10}", r, crossCor); } //end of rows in segment for (int r = stopRow; r < endRow; r++) { scores[r] = scores[stopRow - 1]; //fill in end of segment } } //foreach (AcousticEvent av in segments) var tuple = Tuple.Create(scores); return(tuple); }//Execute