public double[] GetLogPsd() { var psd = MatrixTools.GetColumnAverages(this.Data); var logPsd = DataTools.LogValues(psd); return(logPsd); }
public void GetPsd(string path) { var psd = MatrixTools.GetColumnAverages(this.Data); FileTools.WriteArray2File(psd, path + ".csv"); GraphsAndCharts.DrawGraph(psd, "Title", new FileInfo(path)); //GraphsAndCharts.DrawGraph("Title", psd, width, height, 4 new FileInfo(path)); //image.Save(path); }
public void DrawLogPsd(string path) { var psd = MatrixTools.GetColumnAverages(this.Data); var logPsd = DataTools.LogValues(psd); FileTools.WriteArray2File(logPsd, path + ".csv"); GraphsAndCharts.DrawGraph(logPsd, "log PSD", new FileInfo(path)); //GraphsAndCharts.DrawGraph("Title", psd, width, height, 4 new FileInfo(path)); //image.Save(path, ImageFormat.Png); }
public static Dictionary <string, double[]> AverageIndicesOverMinutes(Dictionary <string, double[, ]> allIndices, int startRowId, int endRowId) { var opIndices = new Dictionary <string, double[]>(); var keys = allIndices.Keys; foreach (string key in keys) { var success = allIndices.TryGetValue(key, out double[,] matrix); if (success) { var colCount = matrix.GetLength(1); var subMatrix = MatrixTools.Submatrix(matrix, startRowId, 0, endRowId, colCount - 1); opIndices.Add(key, MatrixTools.GetColumnAverages(subMatrix)); } } return(opIndices); }
/// <summary> /// THE KEY ANALYSIS METHOD /// </summary> /// <param name="recording"> /// The segment Of Source File. /// </param> /// <param name="configDict"> /// The config Dict. /// </param> /// <param name="value"></param> /// <returns> /// The <see cref="LimnodynastesConvexResults"/>. /// </returns> internal static LimnodynastesConvexResults Analysis( Dictionary <string, double[, ]> dictionaryOfHiResSpectralIndices, AudioRecording recording, Dictionary <string, string> configDict, AnalysisSettings analysisSettings, SegmentSettingsBase segmentSettings) { // for Limnodynastes convex, in the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // for Limnodynastes convex, in the JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So strategy is to look for three peaks separated by same amount and in the vicinity of the above, // starting with highest power (the top peak) and working down to lowest power (bottom peak). var outputDir = segmentSettings.SegmentOutputDirectory; TimeSpan segmentStartOffset = segmentSettings.SegmentStartOffset; //KeyValuePair<string, double[,]> kvp = dictionaryOfHiResSpectralIndices.First(); var spg = dictionaryOfHiResSpectralIndices["RHZ"]; int rhzRowCount = spg.GetLength(0); int rhzColCount = spg.GetLength(1); int sampleRate = recording.SampleRate; double herzPerBin = sampleRate / 2 / (double)rhzRowCount; double scoreThreshold = (double?)double.Parse(configDict["EventThreshold"]) ?? 3.0; int minimumFrequency = (int?)int.Parse(configDict["MinHz"]) ?? 850; int dominantFrequency = (int?)int.Parse(configDict["DominantFrequency"]) ?? 1850; // # The Limnodynastes call has three major peaks. The dominant peak is at 1850 or as set above. // # The second and third peak are at equal gaps below. DominantFreq-gap and DominantFreq-(2*gap); // # Set the gap in the Config file. Should typically be in range 880 to 970 int peakGapInHerz = (int?)int.Parse(configDict["PeakGap"]) ?? 470; int F1AndF2Gap = (int)Math.Round(peakGapInHerz / herzPerBin); //int F1AndF2Gap = 10; // 10 = number of freq bins int F1AndF3Gap = 2 * F1AndF2Gap; //int F1AndF3Gap = 20; int hzBuffer = 250; int bottomBin = 5; int dominantBin = (int)Math.Round(dominantFrequency / herzPerBin); int binBuffer = (int)Math.Round(hzBuffer / herzPerBin);; int dominantBinMin = dominantBin - binBuffer; int dominantBinMax = dominantBin + binBuffer; // freqBin + rowID = binCount - 1; // therefore: rowID = binCount - freqBin - 1; int minRowID = rhzRowCount - dominantBinMax - 1; int maxRowID = rhzRowCount - dominantBinMin - 1; int bottomRow = rhzRowCount - bottomBin - 1; var list = new List <Point>(); // loop through all spectra/columns of the hi-res spectrogram. for (int c = 1; c < rhzColCount - 1; c++) { double maxAmplitude = -double.MaxValue; int idOfRowWithMaxAmplitude = 0; for (int r = minRowID; r <= bottomRow; r++) { if (spg[r, c] > maxAmplitude) { maxAmplitude = spg[r, c]; idOfRowWithMaxAmplitude = r; } } if (idOfRowWithMaxAmplitude < minRowID) { continue; } if (idOfRowWithMaxAmplitude > maxRowID) { continue; } // want a spectral peak. if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c - 1]) { continue; } if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c + 1]) { continue; } // peak should exceed thresold amplitude if (spg[idOfRowWithMaxAmplitude, c] < 3.0) { continue; } // convert row ID to freq bin ID int freqBinID = rhzRowCount - idOfRowWithMaxAmplitude - 1; list.Add(new Point(c, freqBinID)); // we now have a list of potential hits for LimCon. This needs to be filtered. // Console.WriteLine("Col {0}, Bin {1} ", c, freqBinID); } // DEBUG ONLY // ################################ TEMPORARY ################################ // superimpose point on RHZ HiRes spectrogram for debug purposes bool drawOnHiResSpectrogram = true; //string filePath = @"G:\SensorNetworks\Output\Frogs\TestOfHiResIndices-2016July\Test\Towsey.HiResIndices\SpectrogramImages\3mile_creek_dam_-_Herveys_Range_1076_248366_20130305_001700_30_0min.CombinedGreyScale.png"; var fileName = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name); string filePath = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScale.png"; var debugImage = new FileInfo(filePath); if (!debugImage.Exists) { drawOnHiResSpectrogram = false; } if (drawOnHiResSpectrogram) { // put red dot where max is Bitmap bmp = new Bitmap(filePath); foreach (Point point in list) { bmp.SetPixel(point.X + 70, 1911 - point.Y, Color.Red); } // mark off every tenth frequency bin for (int r = 0; r < 26; r++) { bmp.SetPixel(68, 1911 - (r * 10), Color.Blue); bmp.SetPixel(69, 1911 - (r * 10), Color.Blue); } // mark off upper bound and lower frequency bound bmp.SetPixel(69, 1911 - dominantBinMin, Color.Lime); bmp.SetPixel(69, 1911 - dominantBinMax, Color.Lime); //bmp.SetPixel(69, 1911 - maxRowID, Color.Lime); string opFilePath = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScaleAnnotated.png"; bmp.Save(opFilePath); } // END DEBUG ################################ TEMPORARY ################################ // now construct the standard decibel spectrogram WITHOUT noise removal, and look for LimConvex // get frame parameters for the analysis double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); int frameSize = rhzRowCount * 2; int frameStep = frameSize; // this default = zero overlap double frameDurationInSeconds = frameSize / (double)sampleRate; double frameStepInSeconds = frameStep / (double)sampleRate; double framesPerSec = 1 / frameStepInSeconds; //var dspOutput = DSP_Frames.ExtractEnvelopeAndFFTs(recording, frameSize, frameStep); //// Generate deciBel spectrogram //double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.amplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon); // i: Init SONOGRAM config var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = 0.0, NoiseReductionType = NoiseReductionType.None, }; // init sonogram BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); // remove the DC row of the spectrogram sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1); //scores.Add(new Plot("Decibels", DataTools.NormaliseMatrixValues(dBArray), ActivityAndCover.DefaultActivityThresholdDb)); //scores.Add(new Plot("Active Frames", DataTools.Bool2Binary(activity.activeFrames), 0.0)); // convert spectral peaks to frequency //var tuple_DecibelPeaks = SpectrogramTools.HistogramOfSpectralPeaks(deciBelSpectrogram); //int[] peaksBins = tuple_DecibelPeaks.Item2; //double[] freqPeaks = new double[peaksBins.Length]; //int binCount = sonogram.Data.GetLength(1); //for (int i = 1; i < peaksBins.Length; i++) freqPeaks[i] = (lowerBinBound + peaksBins[i]) / (double)nyquistBin; //scores.Add(new Plot("Max Frequency", freqPeaks, 0.0)); // location of peaks for spectral images // create new list of LimCon hits in the standard spectrogram. double timeSpanOfFrameInSeconds = frameSize / (double)sampleRate; var newList = new List <int[]>(); int lastFrameID = sonogram.Data.GetLength(0) - 1; int lastBinID = sonogram.Data.GetLength(1) - 1; foreach (Point point in list) { double secondsFromStartOfSegment = (point.X * 0.1) + 0.05; // convert point.Y to center of time-block. int framesFromStartOfSegment = (int)Math.Round(secondsFromStartOfSegment / timeSpanOfFrameInSeconds); // location of max point is uncertain, so search in neighbourhood. // NOTE: sonogram.data matrix is time*freqBin double maxValue = -double.MaxValue; int idOfTMax = framesFromStartOfSegment; int idOfFMax = point.Y; for (int deltaT = -4; deltaT <= 4; deltaT++) { for (int deltaF = -1; deltaF <= 1; deltaF++) { int newT = framesFromStartOfSegment + deltaT; if (newT < 0) { newT = 0; } else if (newT > lastFrameID) { newT = lastFrameID; } double value = sonogram.Data[newT, point.Y + deltaF]; if (value > maxValue) { maxValue = value; idOfTMax = framesFromStartOfSegment + deltaT; idOfFMax = point.Y + deltaF; } } } // newList.Add(new Point(frameSpan, point.Y)); int[] array = new int[2]; array[0] = idOfTMax; array[1] = idOfFMax; newList.Add(array); } // Now obtain more of spectrogram to see if have peaks at two other places characteristic of Limnodynastes convex. // In the D.Stewart CD, there are peaks close to: //1. 1950 Hz //2. 1460 hz //3. 970 hz These are 490 Hz apart. // For Limnodynastes convex, in the JCU recording, there are peaks close to: //1. 1780 Hz //2. 1330 hz //3. 880 hz These are 450 Hz apart. // So strategy is to look for three peaks separated by same amount and in the vicinity of the above, // starting with highest power (the top peak) and working down to lowest power (bottom peak). //We have found top/highest peak - now find the other two. int secondDominantFrequency = 1380; int secondDominantBin = (int)Math.Round(secondDominantFrequency / herzPerBin); int thirdDominantFrequency = 900; int thirdDominantBin = (int)Math.Round(thirdDominantFrequency / herzPerBin); var acousticEvents = new List <AcousticEvent>(); int Tbuffer = 2; // First extract a sub-matrix. foreach (int[] array in newList) { // NOTE: sonogram.data matrix is time*freqBin int Tframe = array[0]; int F1bin = array[1]; double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - Tbuffer, 0, Tframe + Tbuffer, F1bin); double F1power = subMatrix[Tbuffer, F1bin]; // convert to vector var spectrum = MatrixTools.GetColumnAverages(subMatrix); // use the following code to get estimate of background noise double[,] powerMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - 3, 10, Tframe + 3, F1bin); double averagePower = (MatrixTools.GetRowAverages(powerMatrix)).Average(); double score = F1power - averagePower; // debug - checking what the spectrum looks like. //for (int i = 0; i < 18; i++) // spectrum[i] = -100.0; //DataTools.writeBarGraph(spectrum); // locate the peaks in lower frequency bands, F2 and F3 bool[] peaks = DataTools.GetPeaks(spectrum); int F2bin = 0; double F2power = -200.0; // dB for (int i = -3; i <= 2; i++) { int bin = F1bin - F1AndF2Gap + i; if ((peaks[bin]) && (F2power < subMatrix[1, bin])) { F2bin = bin; F2power = subMatrix[1, bin]; } } if (F2bin == 0) { continue; } if (F2power == -200.0) { continue; } score += (F2power - averagePower); int F3bin = 0; double F3power = -200.0; for (int i = -5; i <= 2; i++) { int bin = F1bin - F1AndF3Gap + i; if ((peaks[bin]) && (F3power < subMatrix[1, bin])) { F3bin = bin; F3power = subMatrix[1, bin]; } } if (F3bin == 0) { continue; } if (F3power == -200.0) { continue; } score += (F3power - averagePower); score /= 3; // ignore events where SNR < decibel threshold if (score < scoreThreshold) { continue; } // ignore events with wrong power distribution. A good LimnoConvex call has strongest F1 power if ((F3power > F1power) || (F2power > F1power)) { continue; } //freq Bin ID must be converted back to Matrix row ID // freqBin + rowID = binCount - 1; // therefore: rowID = binCount - freqBin - 1; minRowID = rhzRowCount - F1bin - 2; maxRowID = rhzRowCount - F3bin - 1; int F1RowID = rhzRowCount - F1bin - 1; int F2RowID = rhzRowCount - F2bin - 1; int F3RowID = rhzRowCount - F3bin - 1; int maxfreq = dominantFrequency + hzBuffer; int topBin = (int)Math.Round(maxfreq / herzPerBin); int frameCount = 4; double duration = frameCount * frameStepInSeconds; double startTimeWrtSegment = (Tframe - 2) * frameStepInSeconds; // Got to here so start initialising an acoustic event var ae = new AcousticEvent(segmentStartOffset, startTimeWrtSegment, duration, minimumFrequency, maxfreq); ae.SetTimeAndFreqScales(framesPerSec, herzPerBin); //var ae = new AcousticEvent(oblong, recording.Nyquist, binCount, frameDurationInSeconds, frameStepInSeconds, frameCount); //ae.StartOffset = TimeSpan.FromSeconds(Tframe * frameStepInSeconds); var pointF1 = new Point(2, topBin - F1bin); var pointF2 = new Point(2, topBin - F2bin); var pointF3 = new Point(2, topBin - F3bin); ae.Points = new List <Point>(); ae.Points.Add(pointF1); ae.Points.Add(pointF2); ae.Points.Add(pointF3); //tried using HitElements but did not do what I wanted later on. //ae.HitElements = new HashSet<Point>(); //ae.HitElements = new SortedSet<Point>(); //ae.HitElements.Add(pointF1); //ae.HitElements.Add(pointF2); //ae.HitElements.Add(pointF3); ae.Score = score; //ae.MinFreq = Math.Round((topBin - F3bin - 5) * herzPerBin); //ae.MaxFreq = Math.Round(topBin * herzPerBin); acousticEvents.Add(ae); } // now add in extra common info to the acoustic events acousticEvents.ForEach(ae => { ae.SpeciesName = configDict[AnalysisKeys.SpeciesName]; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recording.Duration.TotalSeconds; ae.Name = abbreviatedName; ae.BorderColour = Color.Red; ae.FileName = recording.BaseName; }); double[] scores = new double[rhzColCount]; // predefinition of score array double nomalisationConstant = scoreThreshold * 4; // four times the score threshold double compressionFactor = rhzColCount / (double)sonogram.Data.GetLength(0); foreach (AcousticEvent ae in acousticEvents) { ae.ScoreNormalised = ae.Score / nomalisationConstant; if (ae.ScoreNormalised > 1.0) { ae.ScoreNormalised = 1.0; } int frameID = (int)Math.Round(ae.EventStartSeconds / frameDurationInSeconds); int hiresFrameID = (int)Math.Floor(frameID * compressionFactor); scores[hiresFrameID] = ae.ScoreNormalised; } var plot = new Plot(AnalysisName, scores, scoreThreshold); // DEBUG ONLY ################################ TEMPORARY ################################ // Draw a standard spectrogram and mark of hites etc. bool createStandardDebugSpectrogram = true; var imageDir = new DirectoryInfo(outputDir.FullName + @"\SpectrogramImages"); if (!imageDir.Exists) { imageDir.Create(); } if (createStandardDebugSpectrogram) { var fileName2 = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name); string filePath2 = Path.Combine(imageDir.FullName, fileName + ".Spectrogram.png"); Bitmap sonoBmp = (Bitmap)sonogram.GetImage(); int height = sonoBmp.Height; foreach (AcousticEvent ae in acousticEvents) { ae.DrawEvent(sonoBmp); //g.DrawRectangle(pen, ob.ColumnLeft, ob.RowTop, ob.ColWidth-1, ob.RowWidth); //ae.DrawPoint(sonoBmp, ae.HitElements.[0], Color.OrangeRed); //ae.DrawPoint(sonoBmp, ae.HitElements[1], Color.Yellow); //ae.DrawPoint(sonoBmp, ae.HitElements[2], Color.Green); ae.DrawPoint(sonoBmp, ae.Points[0], Color.OrangeRed); ae.DrawPoint(sonoBmp, ae.Points[1], Color.Yellow); ae.DrawPoint(sonoBmp, ae.Points[2], Color.LimeGreen); } // draw the original hits on the standard sonogram foreach (int[] array in newList) { sonoBmp.SetPixel(array[0], height - array[1], Color.Cyan); } // mark off every tenth frequency bin on the standard sonogram for (int r = 0; r < 20; r++) { sonoBmp.SetPixel(0, height - (r * 10) - 1, Color.Blue); sonoBmp.SetPixel(1, height - (r * 10) - 1, Color.Blue); } // mark off upper bound and lower frequency bound sonoBmp.SetPixel(0, height - dominantBinMin, Color.Lime); sonoBmp.SetPixel(0, height - dominantBinMax, Color.Lime); sonoBmp.Save(filePath2); } // END DEBUG ################################ TEMPORARY ################################ return(new LimnodynastesConvexResults { Sonogram = sonogram, Hits = null, Plot = plot, Events = acousticEvents, RecordingDuration = recording.Duration, }); } // Analysis()
/// <summary> /// Calculate summary statistics for supplied temporal and spectral targets. /// </summary> /// <remarks> /// The acoustic statistics calculated in this method are based on methods outlined in /// "Acoustic classification of multiple simultaneous bird species: A multi-instance multi-label approach", /// by Forrest Briggs, Balaji Lakshminarayanan, Lawrence Neal, Xiaoli Z.Fern, Raviv Raich, Sarah J.K.Hadley, Adam S. Hadley, Matthew G. Betts, et al. /// The Journal of the Acoustical Society of America v131, pp4640 (2012); doi: http://dx.doi.org/10.1121/1.4707424 /// .. /// The Briggs feature are calculated from the column (freq bin) and row (frame) sums of the extracted spectrogram. /// 1. Gini Index for frame and bin sums. A measure of dispersion. Problem with gini is that its value is dependent on the row or column count. /// We use entropy instead because value not dependent on row or column count because it is normalized. /// For the following meausres of k-central moments, the freq and time values are normalized in 0,1 to width of the event. /// 2. freq-mean /// 3. freq-variance /// 4. freq-skew and kurtosis /// 5. time-mean /// 6. time-variance /// 7. time-skew and kurtosis /// 8. freq-max (normalized) /// 9. time-max (normalized) /// 10. Briggs et al also calculate a 16 value histogram of gradients for each event mask. We do not do that here although we could. /// ... /// NOTE 1: There are differences between our method of noise reduction and Briggs. Briggs does not convert to decibels /// and instead works with power values. He obtains a noise profile from the 20% of frames having the lowest energy sum. /// NOTE 2: To NormaliseMatrixValues for noise, they divide the actual energy by the noise value. This is equivalent to subtraction when working in decibels. /// There are advantages and disadvantages to Briggs method versus ours. In our case, we hve to convert decibel values back to /// energy values when calculating the statistics for the extracted acoustic event. /// NOTE 3: We do not calculate the higher central moments of the time/frequency profiles, i.e. skew and kurtosis. /// Ony mean and standard deviation. /// .. /// NOTE 4: This method assumes that the passed event occurs totally within the passed recording, /// AND that the passed recording is of sufficient duration to obtain reliable BGN noise profile /// BUT not so long as to cause memory constipation. /// </remarks> /// <param name="recording">as type AudioRecording which contains the event</param> /// <param name="temporalTarget">Both start and end bounds - relative to the supplied recording</param> /// <param name="spectralTarget">both bottom and top bounds in Hertz</param> /// <param name="config">parameters that determine the outcome of the analysis</param> /// <param name="segmentStartOffset">How long since the start of the recording this event occurred</param> /// <returns>an instance of EventStatistics</returns> public static EventStatistics AnalyzeAudioEvent( AudioRecording recording, Range <TimeSpan> temporalTarget, Range <double> spectralTarget, EventStatisticsConfiguration config, TimeSpan segmentStartOffset) { var stats = new EventStatistics { EventStartSeconds = temporalTarget.Minimum.TotalSeconds, EventEndSeconds = temporalTarget.Maximum.TotalSeconds, LowFrequencyHertz = spectralTarget.Minimum, HighFrequencyHertz = spectralTarget.Maximum, SegmentDurationSeconds = recording.Duration.TotalSeconds, SegmentStartSeconds = segmentStartOffset.TotalSeconds, }; // temporal target is supplied relative to recording, but not the supplied audio segment // shift coordinates relative to segment var localTemporalTarget = temporalTarget.Shift(-segmentStartOffset); if (!recording .Duration .AsRangeFromZero(Topology.Inclusive) .Contains(localTemporalTarget)) { stats.Error = true; stats.ErrorMessage = $"Audio not long enough ({recording.Duration}) to analyze target ({localTemporalTarget})"; return(stats); } // convert recording to spectrogram int sampleRate = recording.SampleRate; double epsilon = recording.Epsilon; // extract the spectrogram var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, config.FrameSize, config.FrameStep); double hertzBinWidth = dspOutput1.FreqBinWidth; var stepDurationInSeconds = config.FrameStep / (double)sampleRate; var startFrame = (int)Math.Ceiling(localTemporalTarget.Minimum.TotalSeconds / stepDurationInSeconds); // subtract 1 frame because want to end before start of end point. var endFrame = (int)Math.Floor(localTemporalTarget.Maximum.TotalSeconds / stepDurationInSeconds) - 1; var bottomBin = (int)Math.Floor(spectralTarget.Minimum / hertzBinWidth); var topBin = (int)Math.Ceiling(spectralTarget.Maximum / hertzBinWidth); // Events can have their high value set to the nyquist. // Since the submatrix call below uses an inclusive upper bound an index out of bounds exception occurs in // these cases. So we just ask for the bin below. if (topBin >= config.FrameSize / 2) { topBin = (config.FrameSize / 2) - 1; } // Convert amplitude spectrogram to deciBels and calculate the dB background noise profile double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon); double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram); decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn); decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0); // extract the required acoustic event var eventMatrix = MatrixTools.Submatrix(decibelSpectrogram, startFrame, bottomBin, endFrame, topBin); // Get the SNR of the event. This is just the max value in the matrix because noise reduced MatrixTools.MinMax(eventMatrix, out _, out double max); stats.SnrDecibels = max; // Now need to convert event matrix back to energy values before calculating other statistics eventMatrix = MatrixTools.Decibels2Power(eventMatrix); var columnAverages = MatrixTools.GetColumnAverages(eventMatrix); var rowAverages = MatrixTools.GetRowAverages(eventMatrix); // calculate the mean and temporal standard deviation in decibels NormalDist.AverageAndSD(rowAverages, out double mean, out double stddev); stats.MeanDecibels = 10 * Math.Log10(mean); stats.TemporalStdDevDecibels = 10 * Math.Log10(stddev); // calculate the frequency standard deviation in decibels NormalDist.AverageAndSD(columnAverages, out mean, out stddev); stats.FreqBinStdDevDecibels = 10 * Math.Log10(stddev); // calculate relative location of the temporal maximum int maxRowId = DataTools.GetMaxIndex(rowAverages); stats.TemporalMaxRelative = maxRowId / (double)rowAverages.Length; // calculate the entropy dispersion/concentration indices stats.TemporalEnergyDistribution = 1 - DataTools.EntropyNormalised(rowAverages); stats.SpectralEnergyDistribution = 1 - DataTools.EntropyNormalised(columnAverages); // calculate the spectral centroid and the dominant frequency double binCentroid = CalculateSpectralCentroid(columnAverages); stats.SpectralCentroid = (int)Math.Round(hertzBinWidth * binCentroid) + (int)spectralTarget.Minimum; int maxColumnId = DataTools.GetMaxIndex(columnAverages); stats.DominantFrequency = (int)Math.Round(hertzBinWidth * maxColumnId) + (int)spectralTarget.Minimum; // remainder of this method is to produce debugging images. Can comment out when not debugging. /* * var normalisedIndex = DataTools.NormaliseMatrixValues(columnAverages); * var image4 = GraphsAndCharts.DrawGraph("columnSums", normalisedIndex, 100); * string path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\columnSums.png"; * image4.Save(path4); * normalisedIndex = DataTools.NormaliseMatrixValues(rowAverages); * image4 = GraphsAndCharts.DrawGraph("rowSums", normalisedIndex, 100); * path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\rowSums.png"; * image4.Save(path4); */ return(stats); }
/// <summary> /// Remove events whose acoustic profile does not match that of a flying fox. /// </summary> /// <param name="events">unfiltered acoustic events.</param> /// <param name="sonogram">includes matrix of spectrogram values.</param> /// <returns>filtered acoustic events.</returns> private static List <AcousticEvent> FilterEventsForSpectralProfile(List <AcousticEvent> events, BaseSonogram sonogram) { double[,] spectrogramData = sonogram.Data; //int colCount = spectrogramData.GetLength(1); // The following freq bins are used to demarcate freq bands for spectral tests below. // The hertz values are hard coded but could be included in the config.yml file. int maxBin = (int)Math.Round(8000 / sonogram.FBinWidth); int fourKiloHzBin = (int)Math.Round(4000 / sonogram.FBinWidth); int oneKiloHzBin = (int)Math.Round(1000 / sonogram.FBinWidth); var filteredEvents = new List <AcousticEvent>(); foreach (AcousticEvent ae in events) { int startFrame = ae.Oblong.RowTop; //int endFrame = ae.Oblong.RowBottom; // get all the frames of the acoustic event //var subMatrix = DataTools.Submatrix(spectrogramData, startFrame, 0, endFrame, colCount - 1); // get only the frames from centre of the acoustic event var subMatrix = DataTools.Submatrix(spectrogramData, startFrame + 1, 0, startFrame + 4, maxBin); var spectrum = MatrixTools.GetColumnAverages(subMatrix); var normalisedSpectrum = DataTools.normalise(spectrum); normalisedSpectrum = DataTools.filterMovingAverageOdd(normalisedSpectrum, 11); var maxId = DataTools.GetMaxIndex(normalisedSpectrum); //var hzMax = (int)Math.Ceiling(maxId * sonogram.FBinWidth); // Do TESTS to determine if event has spectrum matching a Flying fox. // Test 1: Spectral maximum should be below 4 kHz. bool passTest1 = maxId < fourKiloHzBin; // Test 2: There should be little energy in 0-1 kHz band. var subband1Khz = DataTools.Subarray(normalisedSpectrum, 0, oneKiloHzBin); double bandArea1 = subband1Khz.Sum(); double energyRatio1 = bandArea1 / normalisedSpectrum.Sum(); // 0.125 = 1/8. i.e. test requires that energy in 0-1kHz band is less than average in all 8 kHz bands // 0.0938 = 3/32. i.e. test requires that energy in 0-1kHz band is less than 3/4 average in all 8 kHz bands // 0.0625 = 1/16. i.e. test requires that energy in 0-1kHz band is less than half average in all 8 kHz bands bool passTest2 = !(energyRatio1 > 0.1); // Test 3: There should be little energy in 4-5 kHz band. var subband4Khz = DataTools.Subarray(normalisedSpectrum, fourKiloHzBin, oneKiloHzBin); double bandArea2 = subband4Khz.Sum(); double energyRatio2 = bandArea2 / normalisedSpectrum.Sum(); bool passTest3 = !(energyRatio2 > 0.125); // TODO write method to determine similarity of spectrum to a true flying fox spectrum. // Problem: it is not certain how variable the FF spectra are. // In ten minutes of recording used so far, which include 14-15 obvious calls, there appear to be two spectral types. // One type has three peaks at around 1.5 kHz, 3 kHz and 6 kHz. // The other type have two peaks around 2.5 and 5.5 kHz. //if (passTest1) //if (true) if (passTest1 && passTest2 && passTest3) { filteredEvents.Add(ae); //DEBUG SPECTRAL PROFILES: UNCOMMENT following lines to get spectral profiles of the events. /* * double startSecond = ae.EventStartSeconds - ae.SegmentStartSeconds; * string name = "CallSpectrum " + (ae.SegmentStartSeconds / 60) + "m" + (int)Math.Floor(startSecond) + "s hzMax" + hzMax; * var bmp2 = GraphsAndCharts.DrawGraph(name, normalisedSpectrum, 100); * bmp2.Save(Path.Combine(@"PATH\Towsey.PteropusSpecies", name + ".png")); */ } } return(filteredEvents); }
public static void SimilarityIndex(double[] channelL, double[] channelR, double epsilon, int sampleRate, out double similarityIndex, out double decibelIndex, out double avDecibelBias, out double medianDecibelBias, out double lowFreqDbBias, out double midFreqDbBias, out double hiFreqDbBias) { //var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFFTs(subsegmentRecording, frameSize, frameStep); int frameSize = 512; int frameStep = 512; frameSize *= 16; // take longer window to get low freq frameStep *= 16; var dspOutputL = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, frameSize, frameStep); var avSpectrumL = MatrixTools.GetColumnAverages(dspOutputL.AmplitudeSpectrogram); //var medianSpectrumL = MatrixTools.GetColumnMedians(dspOutputL.amplitudeSpectrogram); var dspOutputR = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, frameSize, frameStep); var avSpectrumR = MatrixTools.GetColumnAverages(dspOutputR.AmplitudeSpectrogram); //var medianSpectrumR = MatrixTools.GetColumnMedians(dspOutputR.amplitudeSpectrogram); similarityIndex = 0.0; decibelIndex = 0.0; for (int i = 0; i < avSpectrumR.Length; i++) { double min = Math.Min(avSpectrumL[i], avSpectrumR[i]); double max = Math.Max(avSpectrumL[i], avSpectrumR[i]); if (max <= 0.000001) { max = 0.000001; // to prevent division by zero. } // index = min / max; double index = min * min / (max * max); similarityIndex += index; double dBmin = 20 * Math.Log10(min); double dBmax = 20 * Math.Log10(max); decibelIndex += dBmax - dBmin; } similarityIndex /= avSpectrumR.Length; decibelIndex /= avSpectrumR.Length; double medianLeft = Statistics.GetMedian(avSpectrumL); double medianRight = Statistics.GetMedian(avSpectrumR); medianDecibelBias = medianLeft - medianRight; // init values avDecibelBias = 0.0; lowFreqDbBias = 0.0; // calculate the freq band bounds for 2kHz and 7khz. int lowBound = frameSize * 2000 / sampleRate; int midBound = frameSize * 7000 / sampleRate; for (int i = 0; i < lowBound; i++) { double dbLeft = 20 * Math.Log10(avSpectrumL[i]); double dbRight = 20 * Math.Log10(avSpectrumR[i]); avDecibelBias += dbLeft - dbRight; lowFreqDbBias += dbLeft - dbRight; } midFreqDbBias = 0.0; for (int i = lowBound; i < midBound; i++) { double dbLeft = 20 * Math.Log10(avSpectrumL[i]); double dbRight = 20 * Math.Log10(avSpectrumR[i]); avDecibelBias += dbLeft - dbRight; midFreqDbBias += dbLeft - dbRight; } hiFreqDbBias = 0.0; for (int i = midBound; i < avSpectrumR.Length; i++) { double dbLeft = 20 * Math.Log10(avSpectrumL[i]); double dbRight = 20 * Math.Log10(avSpectrumR[i]); avDecibelBias += dbLeft - dbRight; hiFreqDbBias += dbLeft - dbRight; } avDecibelBias /= avSpectrumR.Length; lowFreqDbBias /= lowBound; midFreqDbBias /= midBound - lowBound; hiFreqDbBias /= avSpectrumR.Length - midBound; }
public void PowerSpectrumDensityTest() { var inputPath = @"C:\Users\kholghim\Mahnoosh\Liz\TrainSet\"; var resultPsdPath = @"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\train_LogPSD.bmp"; var resultNoiseReducedPsdPath = @"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\train_LogPSD_NoiseReduced.bmp"; //var inputPath =Path.Combine(inputDir, "TrainSet"); // directory of the one-min recordings of one day (21 and 23 Apr - Black Rail Data) // check whether there is any file in the folder/subfolders if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0) { throw new ArgumentException("The folder of recordings is empty..."); } // get the nyquist value from the first wav file in the folder of recordings int nq = new AudioRecording(Directory.GetFiles(inputPath, "*.wav")[0]).Nyquist; int nyquist = nq; // 11025; int frameSize = 1024; int finalBinCount = 512; //256; // int hertzInterval = 1000; FreqScaleType scaleType = FreqScaleType.Linear; //var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval); //var fst = freqScale.ScaleType; //var fst = FreqScaleType.Linear; //var freqScale = new FrequencyScale(fst); var settings = new SpectrogramSettings() { WindowSize = frameSize, WindowOverlap = 0.1028, //DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, //DoMelScale = false, MelBinCount = 256, DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, NoiseReductionType = NoiseReductionType.None, NoiseReductionParameter = 0.0, }; var attributes = new SpectrogramAttributes() { NyquistFrequency = nyquist, Duration = TimeSpan.FromMinutes(1440), }; List <double[]> psd = new List <double[]>(); foreach (string filePath in Directory.GetFiles(inputPath, "*.wav")) { FileInfo fileInfo = filePath.ToFileInfo(); // process the wav file if it is not empty if (fileInfo.Length != 0) { var recording = new AudioRecording(filePath); //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); //var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); // save the matrix // skip normalisation // skip mel settings.SourceFileName = recording.BaseName; var spectrogram = new EnergySpectrogram(settings, recording.WavReader); //var sonogram = new AmplitudeSpectrogram(settings, recording.WavReader); //var energySpectrogram = new EnergySpectrogram(sonoConfig, amplitudeSpectrogram.Data); //var energySpectrogram = new EnergySpectrogram(sonoConfig, recording.WavReader); //var energySpectrogram = new EnergySpectrogram(settings, recording.WavReader); // square the FFT coefficients to get an energy spectrogram // double[,] energySpectrogram = PowerSpectrumDensity.GetEnergyValues(amplitudeSpectrogram.Data); // RMS NORMALIZATION //double[,] normalizedValues = SNR.RmsNormalization(energySpectro.Data); //energySpectro.Data = SNR.RmsNormalization(energySpectro.Data); // Median Noise Reduction //spectrogram.Data = PcaWhitening.NoiseReduction(spectrogram.Data); //spectrogram.Data = SNR.NoiseReduce_Standard(spectrogram.Data); //double[] psd = PowerSpectralDensity.GetPowerSpectrum(noiseReducedValues); //psd.Add(energySpectro.GetLogPsd()); psd.Add(MatrixTools.GetColumnAverages(spectrogram.Data)); //psd.Add(SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram(normalizedValues)); //psd.Add(PowerSpectralDensity.GetPowerSpectrum(normalizedValues)); } } // writing psd matrix to csv file //Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\psd.csv"), psd.ToArray().ToMatrix()); //Image imagePsd = DecibelSpectrogram.DrawSpectrogramAnnotated(psd.ToArray().ToMatrix(), settings, attributes); //imagePsd.Save(resultPsdPath, ImageFormat.Bmp); var psdMatrix = psd.ToArray().ToMatrix(); // calculate the log of matrix var logPsd = MatrixTools.Matrix2LogValues(psdMatrix); Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\logPsd.csv"), logPsd); var image = DecibelSpectrogram.DrawSpectrogramAnnotated(logPsd, settings, attributes); image.Save(resultPsdPath); var noiseReducedLogPsd = PcaWhitening.NoiseReduction(logPsd); //SNR.NoiseReduce_Standard(logPsd); //SNR.NoiseReduce_Mean(logPsd, 0.0);//SNR.NoiseReduce_Median(logPsd, 0.0); // Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\logPsd_NoiseReduced.csv"), logPsd); var image2 = DecibelSpectrogram.DrawSpectrogramAnnotated(noiseReducedLogPsd, settings, attributes); image2.Save(resultNoiseReducedPsdPath); //ImageTools.DrawMatrix(psd.ToArray().ToMatrix(), resultPath); //ImageTools.DrawReversedMatrix(psd.ToArray().ToMatrix(), resultPath); //var data = MatrixTools.Matrix2LogValues(psd.ToArray().ToMatrix()); //Image image = ImageTools.DrawReversedMatrixWithoutNormalisation(data); //Image image = ImageTools.DrawReversedMatrixWithoutNormalisation(logPsd); }