// OTHER CONSTANTS //private const string ImageViewer = @"C:\Windows\system32\mspaint.exe"; /// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> /// <param name="recording"></param> /// <param name="configuration"></param> /// <param name="segmentStartOffset"></param> /// <param name="getSpectralIndexes"></param> /// <param name="outputDirectory"></param> /// <param name="imageWidth"></param> /// <returns></returns> public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { var recognizerConfig = new LitoriaWatjulumConfig(); recognizerConfig.ReadConfigFile(configuration); //int maxOscilRate = (int)Math.Ceiling(1 / lwConfig.MinPeriod); if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } TimeSpan recordingDuration = recording.WavReader.Time; // this default framesize seems to work const int frameSize = 128; double windowOverlap = 0.0; // calculate the overlap instead //double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap( // recording.SampleRate, // frameSize, // maxOscilRate); // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, //set default values - ignore those set by user WindowSize = frameSize, WindowOverlap = windowOverlap, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //############################################################################################################################################# //DO THE ANALYSIS var results = Analysis(recording, sonoConfig, recognizerConfig, MainEntry.InDEBUG, segmentStartOffset); //###################################################################### if (results == null) { return(null); //nothing to process } var sonogram = results.Item1; var hits = results.Item2; var scoreArray = results.Item3; var predictedEvents = results.Item4; var debugImage = results.Item5; // old way of creating a path: //var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.FileName), SpeciesName, "png", "DebugSpectrogram")); var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); debugImage.Save(debugPath); //############################################################################################################################################# // Prune events here if required i.e. remove those below threshold score if this not already done. See other recognizers. foreach (var ae in predictedEvents) { // add additional info ae.Name = recognizerConfig.AbbreviatedSpeciesName; ae.SpeciesName = recognizerConfig.SpeciesName; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; } // do a recognizer TEST. if (false) { var testDir = new DirectoryInfo(outputDirectory.Parent.Parent.FullName); TestTools.RecognizerScoresTest(recording.BaseName, testDir, recognizerConfig.AnalysisName, scoreArray); AcousticEvent.TestToCompareEvents(recording.BaseName, testDir, recognizerConfig.AnalysisName, predictedEvents); } var plot = new Plot(this.DisplayName, scoreArray, recognizerConfig.EventThreshold); return(new RecognizerResults() { Sonogram = sonogram, Hits = hits, Plots = plot.AsList(), Events = predictedEvents, }); }
/// <summary> /// The CORE ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double decibelThreshold = double.Parse(configDict["DECIBEL_THRESHOLD"]); //dB double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double callDuration = double.Parse(configDict["CALL_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); int callSpan = (int)Math.Round(callDuration * framesPerSecond); //############################################################################################################################################# //ii: DETECT HARMONICS var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan); double[] dBArray = results.Item1; double[] intensity = results.Item2; //an array of periodicity scores double[] periodicity = results.Item3; //intensity = DataTools.filterMovingAverage(intensity, 3); int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < harmonicIntensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. //expect humans to have max < 1000 Hz double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < noiseBound; j++) { spectrum[j] = 0.0; } int maxIndex = DataTools.GetMaxIndex(spectrum); int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); double discount = 1.0; if (freqWithMaxPower < 1200) { discount = 0.0; } if (intensity[r] > harmonicIntensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS double maxPossibleScore = 0.5; int halfCallSpan = callSpan / 2; var predictedEvents = new List <AcousticEvent>(); for (int i = 0; i < rowCount; i++) { //assume one score position per crow call if (scoreArray[i] < 0.001) { continue; } double startTime = (i - halfCallSpan) / framesPerSecond; AcousticEvent ev = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz); ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth); ev.Score = scoreArray[i]; ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold //ev.Score_MaxPossible = maxPossibleScore; predictedEvents.Add(ev); } //for loop Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// ################ THE KEY ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - ignore those set by user int frameSize = 128; double windowOverlap = 0.5; double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double minDuration = double.Parse(configDict["MIN_DURATION"]); // seconds double maxDuration = double.Parse(configDict["MAX_DURATION"]); // seconds double minPeriod = double.Parse(configDict["MIN_PERIOD"]); // seconds double maxPeriod = double.Parse(configDict["MAX_PERIOD"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = frameSize, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //default values config //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double frameOffset = sonoConfig.GetFrameOffset(sr); double framesPerSecond = 1 / frameOffset; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 256 17640 14.5ms 68.9 68.9 ms hz hz // 512 17640 29.0ms 34.4 34.4 ms hz hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //The Xcorrelation-FFT technique requires number of bins to scan to be power of 2. // Assuming sr=17640 and window=256, then binWidth = 68.9Hz and 1500Hz = bin 21.7.. // Therefore do a Xcorrelation between bins 21 and 22. // Number of frames to span must power of 2. Try 16 frames which covers 232ms - almost 1/4 second. int midHz = 1500; int lowerBin = (int)(midHz / freqBinWidth) + 1; //because bin[0] = DC int upperBin = lowerBin + 4; int lowerHz = (int)Math.Floor((lowerBin - 1) * freqBinWidth); int upperHz = (int)Math.Ceiling((upperBin - 1) * freqBinWidth); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetColumn(sonogram.Data, lowerBin); double[] upperArray = MatrixTools.GetColumn(sonogram.Data, upperBin); lowerArray = DataTools.NormaliseInZeroOne(lowerArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB ####################################################################### upperArray = DataTools.NormaliseInZeroOne(upperArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB ####################################################################### int step = (int)(framesPerSecond / 40); //take one/tenth second steps int stepCount = rowCount / step; int sampleLength = 32; //16 frames = 232ms - almost 1/4 second. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if (lowerSubarray == null || upperSubarray == null) { break; } if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 2; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if (period < minPeriod || period > maxPeriod) { continue; } // lay down score for sample length for (int j = 0; j < sampleLength; j++) { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 3); intensity = DataTools.NormaliseInZeroOne(intensity, 0, 0.5); //## ABSOLUTE NORMALISATION 0-0.5 ####################################################################### List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerHz, upperHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray, segmentStartOffset); var hits = new double[rowCount, colCount]; var plots = new List <Plot>(); //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0)); //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0)); //plots.Add(new Plot("lowerArray", DataTools.NormaliseMatrixValues(lowerArray), 0.25)); //plots.Add(new Plot("upperArray", DataTools.NormaliseMatrixValues(upperArray), 0.25)); //plots.Add(new Plot("intensity", DataTools.NormaliseMatrixValues(intensity), intensityThreshold)); plots.Add(new Plot("intensity", intensity, intensityThreshold)); return(Tuple.Create(sonogram, hits, plots, predictedEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// ################ THE KEY ANALYSIS METHOD for TRILLS /// /// See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles. /// </summary> /// <param name="recording"></param> /// <param name="sonoConfig"></param> /// <param name="lwConfig"></param> /// <param name="returnDebugImage"></param> /// <param name="segmentStartOffset"></param> /// <returns></returns> private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaWatjulumConfig lwConfig, bool returnDebugImage, TimeSpan segmentStartOffset) { double intensityThreshold = lwConfig.IntensityThreshold; double minDuration = lwConfig.MinDurationOfTrill; // seconds double maxDuration = lwConfig.MaxDurationOfTrill; // seconds double minPeriod = lwConfig.MinPeriod; // seconds double maxPeriod = lwConfig.MaxPeriod; // seconds if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 4 * maxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); //int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7); // Could smooth here rather than above. Above seemed slightly better? //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); //differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, lwConfig.DecibelThreshold, minDuration, maxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // LOOK FOR TRILL EVENTS // init the score array double[] scores = new double[rowCount]; // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedTrillEvents) { int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); double oscilFreq; double period; double intensity; Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity); bool periodWithinBounds = period > minPeriod && period < maxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } for (int j = 0; j < dctLength; j++) //lay down score for sample length { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}"; ae.Score_MaxInEvent = maximumIntensity; ae.Profile = lwConfig.ProfileNames[0]; confirmedEvents.Add(ae); } } //###################################################################### // LOOK FOR TINK EVENTS // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS double minDurationOfTink = lwConfig.MinDurationOfTink; // seconds double maxDurationOfTink = lwConfig.MaxDurationOfTink; // seconds // want stronger threshold for tink because brief. double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0; var predictedTinkEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lwConfig.LowerBandMinHz, lwConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, tinkDecibelThreshold, minDurationOfTink, maxDurationOfTink, segmentStartOffset); foreach (var ae2 in predictedTinkEvents) { // Prune the list of potential acoustic events, for example using Cosine Similarity. //rowtop, rowWidth //int eventStart = ae2.Oblong.RowTop; //int eventWidth = ae2.Oblong.RowWidth; //int step = 2; //double maximumIntensity = 0.0; // add abbreviatedSpeciesName into event //if (maximumIntensity >= intensityThreshold) //{ ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}"; //ae2.Score_MaxInEvent = maximumIntensity; ae2.Profile = lwConfig.ProfileNames[1]; confirmedEvents.Add(ae2); //} } //###################################################################### var scorePlot = new Plot(lwConfig.SpeciesName, scores, intensityThreshold); Image debugImage = null; if (returnDebugImage) { // display a variety of debug score arrays double[] normalisedScores; double normalisedThreshold; DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
public static void Main(Arguments arguments) { //1. set up the necessary files //DirectoryInfo diSource = arguments.Source.Directory; FileInfo fiSourceRecording = arguments.Source; FileInfo fiConfig = arguments.Config.ToFileInfo(); FileInfo fiImage = arguments.Output.ToFileInfo(); fiImage.CreateParentDirectories(); string title = "# CREATE FOUR (4) SONOGRAMS FROM AUDIO RECORDING"; string date = "# DATE AND TIME: " + DateTime.Now; LoggedConsole.WriteLine(title); LoggedConsole.WriteLine(date); LoggedConsole.WriteLine("# Input audio file: " + fiSourceRecording.Name); LoggedConsole.WriteLine("# Output image file: " + fiImage); //2. get the config dictionary Config configuration = ConfigFile.Deserialize(fiConfig); //below three lines are examples of retrieving info from Config config //string analysisIdentifier = configuration[AnalysisKeys.AnalysisName]; //bool saveIntermediateWavFiles = (bool?)configuration[AnalysisKeys.SaveIntermediateWavFiles] ?? false; //scoreThreshold = (double?)configuration[AnalysisKeys.EventThreshold] ?? scoreThreshold; //3 transfer conogram parameters to a dictionary to be passed around var configDict = new Dictionary <string, string>(); // #Resample rate must be 2 X the desired Nyquist. Default is that of recording. configDict["ResampleRate"] = (configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? 17640).ToString(); configDict["FrameLength"] = configuration[AnalysisKeys.FrameLength] ?? "512"; int frameSize = configuration.GetIntOrNull(AnalysisKeys.FrameLength) ?? 512; // #Frame Overlap as fraction: default=0.0 configDict["FrameOverlap"] = configuration[AnalysisKeys.FrameOverlap] ?? "0.0"; double windowOverlap = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.0; // #MinHz: 500 // #MaxHz: 3500 // #NOISE REDUCTION PARAMETERS configDict["DoNoiseReduction"] = configuration["DoNoiseReduction"] ?? "true"; configDict["BgNoiseThreshold"] = configuration["BgNoiseThreshold"] ?? "3.0"; configDict["ADD_AXES"] = configuration["ADD_AXES"] ?? "true"; configDict["AddSegmentationTrack"] = configuration["AddSegmentationTrack"] ?? "true"; // 3: GET RECORDING var startOffsetMins = TimeSpan.Zero; var endOffsetMins = TimeSpan.Zero; FileInfo fiOutputSegment = fiSourceRecording; if (!(startOffsetMins == TimeSpan.Zero && endOffsetMins == TimeSpan.Zero)) { var buffer = new TimeSpan(0, 0, 0); fiOutputSegment = new FileInfo(Path.Combine(fiImage.DirectoryName, "tempWavFile.wav")); //This method extracts segment and saves to disk at the location fiOutputSegment. var resampleRate = configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? AppConfigHelper.DefaultTargetSampleRate; AudioRecording.ExtractSegment(fiSourceRecording, startOffsetMins, endOffsetMins, buffer, resampleRate, fiOutputSegment); } var recording = new AudioRecording(fiOutputSegment.FullName); // EXTRACT ENVELOPE and SPECTROGRAM var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, windowOverlap); // average absolute value over the minute recording ////double[] avAbsolute = dspOutput.Average; // (A) ################################## EXTRACT INDICES FROM THE SIGNAL WAVEFORM ################################## // var wavDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); // double totalSeconds = wavDuration.TotalSeconds; // double[] signalEnvelope = dspOutput.Envelope; // double avSignalEnvelope = signalEnvelope.Average(); // double[] frameEnergy = dspOutput.FrameEnergy; // double highAmplIndex = dspOutput.HighAmplitudeCount / totalSeconds; // double binWidth = dspOutput.BinWidth; // int nyquistBin = dspOutput.NyquistBin; // dspOutput.WindowPower, // dspOutput.FreqBinWidth int nyquistFreq = dspOutput.NyquistFreq; double epsilon = recording.Epsilon; // i: prepare amplitude spectrogram double[,] amplitudeSpectrogramData = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram. var image1 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(amplitudeSpectrogramData)); // ii: prepare decibel spectrogram prior to noise removal double[,] decibelSpectrogramdata = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, recording.SampleRate, epsilon); decibelSpectrogramdata = MatrixTools.NormaliseMatrixValues(decibelSpectrogramdata); var image2 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata)); // iii: Calculate background noise spectrum in decibels // Calculate noise value for each freq bin. double sdCount = 0.0; // number of SDs above the mean for noise removal var decibelProfile = NoiseProfile.CalculateModalNoiseProfile(decibelSpectrogramdata, sdCount); // DataTools.writeBarGraph(dBProfile.NoiseMode); // iv: Prepare noise reduced spectrogram decibelSpectrogramdata = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogramdata, decibelProfile.NoiseThresholds); //double dBThreshold = 1.0; // SPECTRAL dB THRESHOLD for smoothing background //decibelSpectrogramdata = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogramdata, dBThreshold); var image3 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata)); // prepare new sonogram config and draw second image going down different code pathway var config = new SonogramConfig { MinFreqBand = 0, MaxFreqBand = 10000, NoiseReductionType = SNR.KeyToNoiseReductionType("Standard"), NoiseReductionParameter = 1.0, WindowSize = frameSize, WindowOverlap = windowOverlap, }; //var mfccConfig = new MfccConfiguration(config); int bandCount = config.mfccConfig.FilterbankCount; bool doMelScale = config.mfccConfig.DoMelScale; int ccCount = config.mfccConfig.CcCount; int fftBins = config.FreqBinCount; //number of Hz bands = 2^N +1 because includes the DC band int minHz = config.MinFreqBand ?? 0; int maxHz = config.MaxFreqBand ?? nyquistFreq; var standardSonogram = new SpectrogramStandard(config, recording.WavReader); var image4 = standardSonogram.GetImage(); // TODO next line crashes - does not produce cepstral sonogram. //SpectrogramCepstral cepSng = new SpectrogramCepstral(config, recording.WavReader); //Image image5 = cepSng.GetImage(); //var mti = SpectrogramTools.Sonogram2MultiTrackImage(sonogram, configDict); //var image = mti.GetImage(); //Image image = SpectrogramTools.Matrix2SonogramImage(deciBelSpectrogram, config); //Image image = SpectrogramTools.Audio2SonogramImage(FileInfo fiAudio, Dictionary<string, string> configDict); //prepare sonogram images var protoImage6 = new Image_MultiTrack(standardSonogram.GetImage(doHighlightSubband: false, add1KHzLines: true, doMelScale: false)); protoImage6.AddTrack(ImageTrack.GetTimeTrack(standardSonogram.Duration, standardSonogram.FramesPerSecond)); protoImage6.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, protoImage6.SonogramImage.Width)); protoImage6.AddTrack(ImageTrack.GetSegmentationTrack(standardSonogram)); var image6 = protoImage6.GetImage(); var list = new List <Image <Rgb24> >(); list.Add(image1); // amplitude spectrogram list.Add(image2); // decibel spectrogram before noise removal list.Add(image3); // decibel spectrogram after noise removal list.Add(image4); // second version of noise reduced spectrogram //list.Add(image5); // ceptral sonogram list.Add(image6.CloneAs <Rgb24>()); // multitrack image Image finalImage = ImageTools.CombineImagesVertically(list); finalImage.Save(fiImage.FullName); ////2: NOISE REMOVAL //double[,] originalSg = sonogram.Data; //double[,] mnr = sonogram.Data; //mnr = ImageTools.WienerFilter(mnr, 3); //double backgroundThreshold = 4.0; //SETS MIN DECIBEL BOUND //var output = SNR.NoiseReduce(mnr, NoiseReductionType.STANDARD, backgroundThreshold); //double ConfigRange = 70; //sets the the max dB //mnr = SNR.SetConfigRange(output.Item1, 0.0, ConfigRange); ////3: Spectral tracks sonogram //byte[,] binary = MatrixTools.IdentifySpectralRidges(mnr); //binary = MatrixTools.ThresholdBinarySpectrum(binary, mnr, 10); //binary = MatrixTools.RemoveOrphanOnesInBinaryMatrix(binary); ////binary = MatrixTools.PickOutLines(binary); //syntactic approach //sonogram.SetBinarySpectrum(binary); ////sonogram.Data = SNR.SpectralRidges2Intensity(binary, originalSg); //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, false)); //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond)); //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.sonogramImage.Width)); //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); //fn = outputFolder + wavFileName + "_tracks.png"; //image.Save(fn); //LoggedConsole.WriteLine("Spectral tracks sonogram to file: " + fn); //3: prepare image of spectral peaks sonogram //sonogram.Data = SNR.NoiseReduce_Peaks(originalSg, dynamicRange); //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines)); //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration)); //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width)); //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); //fn = outputFolder + wavFileName + "_peaks.png"; //image.Save(fn); //LoggedConsole.WriteLine("Spectral peaks sonogram to file: " + fn); //4: Sobel approach //sonogram.Data = SNR.NoiseReduce_Sobel(originalSg, dynamicRange); //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines)); //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration)); //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width)); //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); //fn = outputFolder + wavFileName + "_sobel.png"; //image.Save(fn); //LoggedConsole.WriteLine("Sobel sonogram to file: " + fn); // I1.txt contains the sonogram matrix produced by matlab //string matlabFile = @"C:\SensorNetworks\Software\AudioAnalysis\AED\Test\matlab\GParrots_JB2_20090607-173000.wav_minute_3\I1.txt"; //double[,] matlabMatrix = Util.fileToMatrix(matlabFile, 256, 5166); //LoggedConsole.WriteLine(matrix[0, 2] + " vs " + matlabMatrix[254, 0]); //LoggedConsole.WriteLine(matrix[0, 3] + " vs " + matlabMatrix[253, 0]); // TODO put this back once sonogram issues resolved /* * LoggedConsole.WriteLine("START: AED"); * IEnumerable<Oblong> oblongs = AcousticEventDetection.detectEvents(3.0, 100, matrix); * LoggedConsole.WriteLine("END: AED"); * * * //set up static variables for init Acoustic events * //AcousticEvent. doMelScale = config.DoMelScale; * AcousticEvent.FreqBinCount = config.FreqBinCount; * AcousticEvent.FreqBinWidth = config.FftConfig.NyquistFreq / (double)config.FreqBinCount; * // int minF = (int)config.MinFreqBand; * // int maxF = (int)config.MaxFreqBand; * AcousticEvent.FrameDuration = config.GetFrameOffset(); * * * var events = new List<EventPatternRecog.Rectangle>(); * foreach (Oblong o in oblongs) * { * var e = new AcousticEvent(o); * events.Add(new EventPatternRecog.Rectangle(e.StartTime, (double) e.MaxFreq, e.StartTime + e.Duration, (double)e.MinFreq)); * //LoggedConsole.WriteLine(e.StartTime + "," + e.Duration + "," + e.MinFreq + "," + e.MaxFreq); * } * * LoggedConsole.WriteLine("# AED events: " + events.Count); * * LoggedConsole.WriteLine("START: EPR"); * IEnumerable<EventPatternRecog.Rectangle> eprRects = EventPatternRecog.detectGroundParrots(events); * LoggedConsole.WriteLine("END: EPR"); * * var eprEvents = new List<AcousticEvent>(); * foreach (EventPatternRecog.Rectangle r in eprRects) * { * var ae = new AcousticEvent(r.Left, r.Right - r.Left, r.Bottom, r.Top, false); * LoggedConsole.WriteLine(ae.WriteProperties()); * eprEvents.Add(ae); * } * * string imagePath = Path.Combine(outputFolder, "RESULTS_" + Path.GetFileNameWithoutExtension(recording.BaseName) + ".png"); * * bool doHighlightSubband = false; bool add1kHzLines = true; * var image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines)); * //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration)); * //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width)); * //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); * image.AddEvents(eprEvents); * image.Save(outputFolder + wavFileName + ".png"); */ LoggedConsole.WriteLine("\nFINISHED!"); }
} //Analysis() public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent> > DetectHarmonics( AudioRecording recording, double intensityThreshold, int minHz, int minFormantgap, int maxFormantgap, double minDuration, int windowSize, double windowOverlap, TimeSpan segmentStartOffset) { //i: MAKE SONOGRAM int numberOfBins = 32; double binWidth = recording.SampleRate / (double)windowSize; int sr = recording.SampleRate; double frameDuration = windowSize / (double)sr; // Duration of full frame or window in seconds double frameOffset = frameDuration * (1 - windowOverlap); //seconds between starts of consecutive frames double framesPerSecond = 1 / frameOffset; //double framesPerSecond = sr / (double)windowSize; //int frameOffset = (int)(windowSize * (1 - overlap)); //int frameCount = (length - windowSize + frameOffset) / frameOffset; double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); var results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram( recording.WavReader.Samples, sr, epsilon, windowSize, windowOverlap); double[] avAbsolute = results2.Average; //average absolute value over the minute recording //double[] envelope = results2.Item2; double[,] matrix = results2 .AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. double windowPower = results2.WindowPower; //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int minBin = (int)Math.Round(minHz / binWidth); int maxHz = (int)Math.Round(minHz + (numberOfBins * binWidth)); int rowCount = matrix.GetLength(0); int colCount = matrix.GetLength(1); int maxbin = minBin + numberOfBins; double[,] subMatrix = MatrixTools.Submatrix(matrix, 0, minBin + 1, rowCount - 1, maxbin); //ii: DETECT HARMONICS int zeroBinCount = 5; //to remove low freq content which dominates the spectrum var results = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount); double[] intensity = results.Item1; //an array of periodicity scores double[] periodicity = results.Item2; //transfer periodicity info to a hits matrix. //intensity = DataTools.filterMovingAverage(intensity, 3); double[] scoreArray = new double[intensity.Length]; var hits = new double[rowCount, colCount]; for (int r = 0; r < rowCount; r++) { double relativePeriod = periodicity[r] / numberOfBins / 2; if (intensity[r] > intensityThreshold) { for (int c = minBin; c < maxbin; c++) { hits[r, c] = relativePeriod; } } double herzPeriod = periodicity[r] * binWidth; if (herzPeriod > minFormantgap && herzPeriod < maxFormantgap) { scoreArray[r] = 2 * intensity[r] * intensity[r]; //enhance high score wrt low score. } } scoreArray = DataTools.filterMovingAverage(scoreArray, 11); //iii: CONVERT TO ACOUSTIC EVENTS double maxDuration = 100000.0; //abitrary long number - do not want to restrict duration of machine noise List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( scoreArray, minHz, maxHz, framesPerSecond, binWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); hits = null; //set up the songogram to return. Use the existing amplitude sonogram int bitsPerSample = recording.WavReader.BitsPerSample; TimeSpan duration = recording.Duration; NoiseReductionType nrt = SNR.KeyToNoiseReductionType("STANDARD"); var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram( recording.BaseName, windowSize, windowOverlap, bitsPerSample, windowPower, sr, duration, nrt, matrix); sonogram.DecibelsNormalised = new double[rowCount]; //foreach frame or time step for (int i = 0; i < rowCount; i++) { sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]); } sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised); return(Tuple.Create(sonogram, hits, scoreArray, predictedEvents)); } //end Execute_HDDetect
public static void Execute(Arguments arguments) { const string Title = "# DETERMINING SIGNAL TO NOISE RATIO IN RECORDING"; string date = "# DATE AND TIME: " + DateTime.Now; Log.WriteLine(Title); Log.WriteLine(date); Log.Verbosity = 1; var input = arguments.Source; var sourceFileName = input.Name; var outputDir = arguments.Output; var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(input.FullName); var outputTxtPath = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".txt").ToFileInfo(); Log.WriteIfVerbose("# Recording file: " + input.FullName); Log.WriteIfVerbose("# Config file: " + arguments.Config); Log.WriteIfVerbose("# Output folder =" + outputDir.FullName); FileTools.WriteTextFile(outputTxtPath.FullName, date + "\n# Recording file: " + input.FullName); //READ PARAMETER VALUES FROM INI FILE // load YAML configuration Config configuration = ConfigFile.Deserialize(arguments.Config); //ii: SET SONOGRAM CONFIGURATION SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = input.FullName; sonoConfig.WindowSize = configuration.GetIntOrNull(AnalysisKeys.KeyFrameSize) ?? 512; sonoConfig.WindowOverlap = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.5; sonoConfig.WindowFunction = configuration[AnalysisKeys.KeyWindowFunction]; sonoConfig.NPointSmoothFFT = configuration.GetIntOrNull(AnalysisKeys.KeyNPointSmoothFft) ?? 256; sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType(configuration[AnalysisKeys.NoiseReductionType]); int minHz = configuration.GetIntOrNull("MIN_HZ") ?? 0; int maxHz = configuration.GetIntOrNull("MAX_HZ") ?? 11050; double segK1 = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K1") ?? 0; double segK2 = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K2") ?? 0; double latency = configuration.GetDoubleOrNull("K1_K2_LATENCY") ?? 0; double vocalGap = configuration.GetDoubleOrNull("VOCAL_GAP") ?? 0; double minVocalLength = configuration.GetDoubleOrNull("MIN_VOCAL_DURATION") ?? 0; //bool DRAW_SONOGRAMS = (bool?)configuration.DrawSonograms ?? true; //options to draw sonogram //double intensityThreshold = Acoustics.AED.Default.intensityThreshold; //if (dict.ContainsKey(key_AED_INTENSITY_THRESHOLD)) intensityThreshold = Double.Parse(dict[key_AED_INTENSITY_THRESHOLD]); //int smallAreaThreshold = Acoustics.AED.Default.smallAreaThreshold; //if( dict.ContainsKey(key_AED_SMALL_AREA_THRESHOLD)) smallAreaThreshold = Int32.Parse(dict[key_AED_SMALL_AREA_THRESHOLD]); // COnvert input recording into wav var convertParameters = new AudioUtilityRequest { TargetSampleRate = 17640 }; var fileToAnalyse = new FileInfo(Path.Combine(outputDir.FullName, "temp.wav")); if (File.Exists(fileToAnalyse.FullName)) { File.Delete(fileToAnalyse.FullName); } var convertedFileInfo = AudioFilePreparer.PrepareFile( input, fileToAnalyse, convertParameters, outputDir); // (A) ########################################################################################################################## AudioRecording recording = new AudioRecording(fileToAnalyse.FullName); int signalLength = recording.WavReader.Samples.Length; TimeSpan wavDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); double frameDurationInSeconds = sonoConfig.WindowSize / (double)recording.SampleRate; TimeSpan frameDuration = TimeSpan.FromTicks((long)(frameDurationInSeconds * TimeSpan.TicksPerSecond)); int stepSize = (int)Math.Floor(sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap)); double stepDurationInSeconds = sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap) / recording.SampleRate; TimeSpan stepDuration = TimeSpan.FromTicks((long)(stepDurationInSeconds * TimeSpan.TicksPerSecond)); double framesPerSecond = 1 / stepDuration.TotalSeconds; int frameCount = signalLength / stepSize; // (B) ################################## EXTRACT ENVELOPE and SPECTROGRAM ################################## var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts( recording, sonoConfig.WindowSize, sonoConfig.WindowOverlap); //double[] avAbsolute = dspOutput.Average; //average absolute value over the minute recording // (C) ################################## GET SIGNAL WAVEFORM ################################## double[] signalEnvelope = dspOutput.Envelope; double avSignalEnvelope = signalEnvelope.Average(); // (D) ################################## GET Amplitude Spectrogram ################################## double[,] amplitudeSpectrogram = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram. // (E) ################################## Generate deciBel spectrogram from amplitude spectrogram double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra( dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, recording.SampleRate, epsilon); LoggedConsole.WriteLine("# Finished calculating decibel spectrogram."); StringBuilder sb = new StringBuilder(); sb.AppendLine("\nSIGNAL PARAMETERS"); sb.AppendLine("Signal Duration =" + wavDuration); sb.AppendLine("Sample Rate =" + recording.SampleRate); sb.AppendLine("Min Signal Value =" + dspOutput.MinSignalValue); sb.AppendLine("Max Signal Value =" + dspOutput.MaxSignalValue); sb.AppendLine("Max Absolute Ampl =" + signalEnvelope.Max().ToString("F3") + " (See Note 1)"); sb.AppendLine("Epsilon Ampl (1 bit)=" + epsilon); sb.AppendLine("\nFRAME PARAMETERS"); sb.AppendLine("Window Size =" + sonoConfig.WindowSize); sb.AppendLine("Frame Count =" + frameCount); sb.AppendLine("Envelope length=" + signalEnvelope.Length); sb.AppendLine("Frame Duration =" + frameDuration.TotalMilliseconds.ToString("F3") + " ms"); sb.AppendLine("Frame overlap =" + sonoConfig.WindowOverlap); sb.AppendLine("Step Size =" + stepSize); sb.AppendLine("Step duration =" + stepDuration.TotalMilliseconds.ToString("F3") + " ms"); sb.AppendLine("Frames Per Sec =" + framesPerSecond.ToString("F1")); sb.AppendLine("\nFREQUENCY PARAMETERS"); sb.AppendLine("Nyquist Freq =" + dspOutput.NyquistFreq + " Hz"); sb.AppendLine("Freq Bin Width =" + dspOutput.FreqBinWidth.ToString("F2") + " Hz"); sb.AppendLine("Nyquist Bin =" + dspOutput.NyquistBin); sb.AppendLine("\nENERGY PARAMETERS"); double val = dspOutput.FrameEnergy.Min(); sb.AppendLine( "Minimum dB / frame =" + (10 * Math.Log10(val)).ToString("F2") + " (See Notes 2, 3 & 4)"); val = dspOutput.FrameEnergy.Max(); sb.AppendLine("Maximum dB / frame =" + (10 * Math.Log10(val)).ToString("F2")); sb.AppendLine("\ndB NOISE SUBTRACTION"); double noiseRange = 2.0; //sb.AppendLine("Noise (estimate of mode) =" + sonogram.SnrData.NoiseSubtracted.ToString("F3") + " dB (See Note 5)"); //double noiseSpan = sonogram.SnrData.NoiseRange; //sb.AppendLine("Noise range =" + noiseSpan.ToString("F2") + " to +" + (noiseSpan * -1).ToString("F2") + " dB (See Note 6)"); //sb.AppendLine("SNR (max frame-noise) =" + sonogram.SnrData.Snr.ToString("F2") + " dB (See Note 7)"); //sb.Append("\nSEGMENTATION PARAMETERS"); //sb.Append("Segment Thresholds K1: {0:f2}. K2: {1:f2} (See Note 8)", segK1, segK2); //sb.Append("# Event Count = " + predictedEvents.Count()); FileTools.Append2TextFile(outputTxtPath.FullName, sb.ToString()); FileTools.Append2TextFile(outputTxtPath.FullName, GetSNRNotes(noiseRange).ToString()); // (F) ################################## DRAW IMAGE 1: original spectorgram Log.WriteLine("# Start drawing noise reduced sonograms."); TimeSpan X_AxisInterval = TimeSpan.FromSeconds(1); //int Y_AxisInterval = (int)Math.Round(1000 / dspOutput.FreqBinWidth); int nyquist = recording.SampleRate / 2; int hzInterval = 1000; var image1 = DrawSonogram(deciBelSpectrogram, wavDuration, X_AxisInterval, stepDuration, nyquist, hzInterval); // (G) ################################## Calculate modal background noise spectrum in decibels //double SD_COUNT = -0.5; // number of SDs above the mean for noise removal //NoiseReductionType nrt = NoiseReductionType.MODAL; //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, SD_COUNT); //double upperPercentileBound = 0.2; // lowest percentile for noise removal //NoiseReductionType nrt = NoiseReductionType.LOWEST_PERCENTILE; //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound); // (H) ################################## Calculate BRIGGS noise removal from amplitude spectrum int percentileBound = 20; // low energy percentile for noise removal //double binaryThreshold = 0.6; //works for higher SNR recordings double binaryThreshold = 0.4; //works for lower SNR recordings //double binaryThreshold = 0.3; //works for lower SNR recordings double[,] m = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetMask( amplitudeSpectrogram, percentileBound, binaryThreshold); string title = "TITLE"; var image2 = NoiseRemoval_Briggs.DrawSonogram( m, wavDuration, X_AxisInterval, stepDuration, nyquist, hzInterval, title); //Image image2 = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetSonograms(amplitudeSpectrogram, upperPercentileBound, binaryThreshold, // wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval); // (I) ################################## Calculate MEDIAN noise removal from amplitude spectrum //double upperPercentileBound = 0.8; // lowest percentile for noise removal //NoiseReductionType nrt = NoiseReductionType.MEDIAN; //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound); //double[,] noiseReducedSpectrogram1 = tuple.Item1; // //double[] noiseProfile = tuple.Item2; // smoothed modal profile //SNR.NoiseProfile dBProfile = SNR.CalculateNoiseProfile(deciBelSpectrogram, SD_COUNT); // calculate noise value for each freq bin. //double[] noiseProfile = DataTools.filterMovingAverage(dBProfile.noiseThresholds, 7); // smooth modal profile //double[,] noiseReducedSpectrogram1 = SNR.TruncateBgNoiseFromSpectrogram(deciBelSpectrogram, dBProfile.noiseThresholds); //Image image2 = DrawSonogram(noiseReducedSpectrogram1, wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval); var combinedImage = ImageTools.CombineImagesVertically(image1, image2); string imagePath = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".png"); combinedImage.Save(imagePath); // (G) ################################## Calculate modal background noise spectrum in decibels Log.WriteLine("# Finished recording:- " + input.Name); }
/// <summary> /// Do your analysis. This method is called once per segment (typically one-minute segments). /// </summary> public override RecognizerResults Recognize( AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth) { //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>"; //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>"; // this default framesize seems to work for Lewin's Rail const int frameSize = 512; // DO NOT SET windowOverlap. Calculate it below. if (imageWidth == null) { throw new ArgumentNullException(nameof(imageWidth)); } // check the sample rate. Must be 22050 if (recording.WavReader.SampleRate != 22050) { throw new InvalidOperationException("Requires a 22050Hz file"); } TimeSpan recordingDuration = recording.WavReader.Time; // check for the profiles in the config file bool hasProfiles = ConfigFile.HasProfiles(configuration); if (!hasProfiles) { throw new ConfigFileException("The Config file for L.pectoralis must contain a profiles object."); } // get the profile names string[] profileNames = ConfigFile.GetProfileNames(configuration); var recognizerConfig = new LewinsRailConfig(); var prunedEvents = new List <AcousticEvent>(); var plots = new List <Plot>(); BaseSonogram sonogram = null; // cycle through the profiles and analyse recording using each of them foreach (var name in profileNames) { Log.Debug($"Reading profile <{name}>."); recognizerConfig.ReadConfigFile(configuration, name); // ignore oscillations above this threshold freq int maxOscilRate = (int)Math.Ceiling(1 / recognizerConfig.MinPeriod); // calculate frame overlap and ignor any user inut. double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap( recording.SampleRate, frameSize, maxOscilRate); // i: MAKE SONOGRAM var sonoConfig = new SonogramConfig { SourceFName = recording.BaseName, //set default values - ignore those set by user WindowSize = frameSize, WindowOverlap = windowOverlap, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; //############################################################################################################################################# //DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER var results = Analysis(recording, sonoConfig, recognizerConfig, this.ReturnDebugImage, segmentStartOffset); // ###################################################################### if (results == null) { return(null); //nothing to process } sonogram = results.Item1; //var hits = results.Item2; var scoreArray = results.Item3; var predictedEvents = results.Item4; var debugImage = results.Item5; //############################################################################################################################################# if (debugImage == null) { Log.Debug("DebugImage is null, not writing file"); } else if (MainEntry.InDEBUG) { var imageName = AnalysisResultName(recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram"); var debugPath = outputDirectory.Combine(imageName); //debugImage.Save(debugPath.FullName); } foreach (var ae in predictedEvents) { // add additional info if (!(ae.Score > recognizerConfig.EventThreshold)) { continue; } ae.Name = recognizerConfig.AbbreviatedSpeciesName; ae.SpeciesName = recognizerConfig.SpeciesName; ae.SegmentStartSeconds = segmentStartOffset.TotalSeconds; ae.SegmentDurationSeconds = recordingDuration.TotalSeconds; prunedEvents.Add(ae); } // increase very low scores for (int j = 0; j < scoreArray.Length; j++) { scoreArray[j] *= 4; if (scoreArray[j] > 1.0) { scoreArray[j] = 1.0; } } var plot = new Plot(this.DisplayName, scoreArray, recognizerConfig.EventThreshold); plots.Add(plot); } return(new RecognizerResults() { Sonogram = sonogram, Hits = null, Plots = plots, Events = prunedEvents, }); }
/// <summary> /// Does the Analysis /// Returns a DataTable /// </summary> /// <param name="fiSegmentOfSourceFile"></param> /// <param name="configDict"></param> /// <param name="diOutputDir"></param> /// <param name="opFileName"></param> /// <param name="segmentStartOffset"></param> /// <param name="config"></param> /// <param name="segmentAudioFile"></param> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, DirectoryInfo diOutputDir, string opFileName, TimeSpan segmentStartOffset) { //set default values int bandWidth = 500; //detect bars in bands of this width. int frameSize = 1024; double windowOverlap = 0.0; double intensityThreshold = double.Parse(configDict[key_INTENSITY_THRESHOLD]); //intensityThreshold = 0.01; AudioRecording recording = AudioRecording.GetAudioRecording(fiSegmentOfSourceFile, RESAMPLE_RATE, diOutputDir.FullName, opFileName); if (recording == null) { LoggedConsole.WriteLine("############ WARNING: Recording could not be obtained - likely file does not exist."); return(null); } int sr = recording.SampleRate; double binWidth = recording.SampleRate / (double)frameSize; double frameDuration = frameSize / (double)sr; double frameOffset = frameDuration * (1 - windowOverlap); //seconds between start of each frame double framesPerSecond = 1 / frameOffset; TimeSpan tsRecordingtDuration = recording.Duration; int colStep = (int)Math.Round(bandWidth / binWidth); //i: GET SONOGRAM AS MATRIX double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); var results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, sr, epsilon, frameSize, windowOverlap); double[] avAbsolute = results2.Average; //average absolute value over the minute recording //double[] envelope = results2.Item2; double[,] spectrogram = results2.AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. double windowPower = results2.WindowPower; //############################ NEXT LINE FOR DEBUGGING ONLY //spectrogram = GetTestSpectrogram(spectrogram.GetLength(0), spectrogram.GetLength(1), 0.01, 0.03); var output = DetectGratingEvents(spectrogram, colStep, intensityThreshold); var amplitudeArray = output.Item2; //for debug purposes only //convert List of Dictionary events to List of ACousticevents. //also set up the hits matrix. int rowCount = spectrogram.GetLength(0); int colCount = spectrogram.GetLength(1); var hitsMatrix = new double[rowCount, colCount]; var acousticEvents = new List <AcousticEvent>(); double minFrameCount = 8; //this assumes that the minimum grid is 2 * 4 = 8 long foreach (Dictionary <string, double> item in output.Item1) { int minRow = (int)item[key_START_FRAME]; int maxRow = (int)item[key_END_FRAME]; int frameCount = maxRow - minRow + 1; if (frameCount < minFrameCount) { continue; //only want events that are over a minimum length } int minCol = (int)item[key_MIN_FREQBIN]; int maxCol = (int)item[key_MAX_FREQBIN]; double periodicity = item[key_PERIODICITY]; double[] subarray = DataTools.Subarray(avAbsolute, minRow, maxRow - minRow + 1); double severity = 0.1; int[] bounds = DataTools.Peaks_CropToFirstAndLast(subarray, severity); minRow = minRow + bounds[0]; maxRow = minRow + bounds[1]; if (maxRow >= rowCount) { maxRow = rowCount - 1; } Oblong o = new Oblong(minRow, minCol, maxRow, maxCol); var ae = new AcousticEvent(segmentStartOffset, o, results2.NyquistFreq, frameSize, frameDuration, frameOffset, frameCount); ae.Name = string.Format("p={0:f0}", periodicity); ae.Score = item[key_SCORE]; ae.ScoreNormalised = item[key_SCORE] / 0.5; acousticEvents.Add(ae); //display event on the hits matrix for (int r = minRow; r < maxRow; r++) { for (int c = minCol; c < maxCol; c++) { hitsMatrix[r, c] = periodicity; } } } //foreach //set up the songogram to return. Use the existing amplitude sonogram int bitsPerSample = recording.WavReader.BitsPerSample; //NoiseReductionType nrt = SNR.Key2NoiseReductionType("NONE"); NoiseReductionType nrt = SNR.KeyToNoiseReductionType("STANDARD"); var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram(recording.BaseName, frameSize, windowOverlap, bitsPerSample, windowPower, sr, tsRecordingtDuration, nrt, spectrogram); sonogram.DecibelsNormalised = new double[sonogram.FrameCount]; for (int i = 0; i < sonogram.FrameCount; i++) //foreach frame or time step { sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]); } sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised); return(Tuple.Create(sonogram, hitsMatrix, amplitudeArray, acousticEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// THE KEY ANALYSIS METHOD. /// </summary> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis( AudioRecording recording, SonogramConfig sonoConfig, LitoriaBicolorConfig lbConfig, bool drawDebugImage, TimeSpan segmentStartOffset) { double decibelThreshold = lbConfig.DecibelThreshold; //dB double intensityThreshold = lbConfig.IntensityThreshold; //double eventThreshold = lbConfig.EventThreshold; //in 0-1 if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM //TimeSpan tsRecordingtDuration = recording.Duration(); int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period double dctDuration = 3 * lbConfig.MaxPeriod; // duration of DCT in frames int dctLength = (int)Math.Round(framesPerSecond * dctDuration); // set up the cosine coefficients double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin); //lowerArray = DataTools.filterMovingAverage(lowerArray, 3); //upperArray = DataTools.filterMovingAverage(upperArray, 3); double[] amplitudeScores = DataTools.SumMinusDifference(lowerArray, upperArray); double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0); // Could smooth here rather than above. Above seemed slightly better? amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7); differenceScores = DataTools.filterMovingAverage(differenceScores, 7); //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS var predictedEvents = AcousticEvent.ConvertScoreArray2Events( amplitudeScores, lbConfig.LowerBandMinHz, lbConfig.UpperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, decibelThreshold, lbConfig.MinDuration, lbConfig.MaxDuration, segmentStartOffset); for (int i = 0; i < differenceScores.Length; i++) { if (differenceScores[i] < 1.0) { differenceScores[i] = 0.0; } } // init the score array double[] scores = new double[rowCount]; //iii: CONVERT SCORES TO ACOUSTIC EVENTS // var hits = new double[rowCount, colCount]; double[,] hits = null; // init confirmed events var confirmedEvents = new List <AcousticEvent>(); // add names into the returned events foreach (var ae in predictedEvents) { //rowtop, rowWidth int eventStart = ae.Oblong.RowTop; int eventWidth = ae.Oblong.RowWidth; int step = 2; double maximumIntensity = 0.0; // scan the event to get oscillation period and intensity for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step) { // Look for oscillations in the difference array double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength); Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity); bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod; //Console.WriteLine($"step={i} period={period:f4}"); if (!periodWithinBounds) { continue; } // lay down score for sample length for (int j = 0; j < dctLength; j++) { if (scores[i + j] < intensity) { scores[i + j] = intensity; } } if (maximumIntensity < intensity) { maximumIntensity = intensity; } } // add abbreviatedSpeciesName into event if (maximumIntensity >= intensityThreshold) { ae.Name = "L.b"; ae.Score_MaxInEvent = maximumIntensity; confirmedEvents.Add(ae); } } //###################################################################### // calculate the cosine similarity scores var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold); //DEBUG IMAGE this recognizer only. MUST set false for deployment. Image debugImage = null; if (drawDebugImage) { // display a variety of debug score arrays //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold); //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold); //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold); //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold); DataTools.Normalise(amplitudeScores, decibelThreshold, out var normalisedScores, out var normalisedThreshold); var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold); DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold); var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold); var debugPlots = new List <Plot> { scorePlot, sumDiffPlot, differencePlot }; // other debug plots //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot }; debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits); } // return new sonogram because it makes for more easy interpretation of the image var returnSonoConfig = new SonogramConfig { SourceFName = recording.BaseName, WindowSize = 512, WindowOverlap = 0, // the default window is HAMMING //WindowFunction = WindowFunctions.HANNING.ToString(), //WindowFunction = WindowFunctions.NONE.ToString(), // if do not use noise reduction can get a more sensitive recogniser. //NoiseReductionType = NoiseReductionType.NONE, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader); return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage)); } //Analysis()
//Analyze() /// <summary> /// ################ THE KEY ANALYSIS METHOD /// Returns a DataTable /// </summary> /// <param name="fiSegmentOfSourceFile"></param> /// <param name="analysisSettings"></param> /// <param name="originalSampleRate"></param> /// <param name="segmentStartOffset"></param> /// <param name="configDict"></param> /// <param name="diOutputDir"></param> public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, AnalysisSettings analysisSettings, int originalSampleRate, TimeSpan segmentStartOffset) { Dictionary <string, string> configDict = analysisSettings.ConfigDict; int originalAudioNyquist = originalSampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz. //set default values - ignore those set by user int frameSize = 32; double windowOverlap = 0.3; int xCorrelationLength = 256; //for Xcorrelation - 256 frames @801 = 320ms, almost 1/3 second. //int xCorrelationLength = 128; //for Xcorrelation - 128 frames @801 = 160ms, almost 1/6 second. //int xCorrelationLength = 64; //for Xcorrelation - 64 frames @128 = 232ms, almost 1/4 second. //int xCorrelationLength = 16; //for Xcorrelation - 16 frames @128 = 232ms, almost 1/4 second. double dBThreshold = 12.0; // read frog data to datatable var dt = CsvTools.ReadCSVToTable(configDict[key_FROG_DATA], true); // read file contining parameters of frog calls to a table double intensityThreshold = double.Parse(configDict[AnalysisKeys.IntensityThreshold]); //in 0-1 double minDuration = double.Parse(configDict[AnalysisKeys.MinDuration]); // seconds double maxDuration = double.Parse(configDict[AnalysisKeys.MaxDuration]); // seconds double minPeriod = double.Parse(configDict[AnalysisKeys.MinPeriodicity]); // seconds double maxPeriod = double.Parse(configDict[AnalysisKeys.MaxPeriodicity]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = recording.BaseName; sonoConfig.WindowSize = frameSize; sonoConfig.WindowOverlap = windowOverlap; //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"); //must do noise removal TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double frameOffset = sonoConfig.GetFrameOffset(sr); double framesPerSecond = 1 / frameOffset; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); //iii: GET TRACKS int nhLimit = 3; //limit of neighbourhood around maximum var peaks = DataTools.GetPeakValues(sonogram.DecibelsPerFrame); var tuple = SpectralTrack.GetSpectralMaxima(sonogram.DecibelsPerFrame, sonogram.Data, dBThreshold, nhLimit); var maxFreqArray = tuple.Item1; //array (one element per frame) indicating which freq bin has max amplitude. var hitsMatrix = tuple.Item2; int herzOffset = 0; int maxFreq = 6000; var tracks = SpectralTrack.GetSpectralTracks(maxFreqArray, framesPerSecond, freqBinWidth, herzOffset, SpectralTrack.MIN_TRACK_DURATION, SpectralTrack.MAX_INTRASYLLABLE_GAP, maxFreq); double severity = 0.5; double dynamicRange = 60; // deciBels above background noise. BG noise has already been removed from each bin. // convert sonogram to a list of frequency bin arrays var listOfFrequencyBins = SpectrogramTools.Sonogram2ListOfFreqBinArrays(sonogram, dynamicRange); int minFrameLength = SpectralTrack.FrameCountEquivalent(SpectralTrack.MIN_TRACK_DURATION, framesPerSecond); for (int i = tracks.Count - 1; i >= 0; i--) { tracks[i].CropTrack(listOfFrequencyBins, severity); if (tracks[i].Length < minFrameLength) { tracks.Remove(tracks[i]); } } // foreach track foreach (SpectralTrack track in tracks) // find any periodicity in the track and calculate its score. { SpectralTrack.DetectTrackPeriodicity(track, xCorrelationLength, listOfFrequencyBins, sonogram.FramesPerSecond); } // foreach track int rowCount = sonogram.Data.GetLength(0); int MAX_FREQ_BOUND = 6000; int topBin = (int)Math.Round(MAX_FREQ_BOUND / freqBinWidth); var plots = CreateScorePlots(tracks, rowCount, topBin); //iv: CONVERT TRACKS TO ACOUSTIC EVENTS List <AcousticEvent> frogEvents = SpectralTrack.ConvertTracks2Events(tracks, segmentStartOffset); // v: GET FROG IDs //var frogEvents = new List<AcousticEvent>(); foreach (AcousticEvent ae in frogEvents) { double oscRate = 1 / ae.Periodicity; // ae.DominantFreq // ae.Score // ae.Duration //ClassifyFrogEvent(ae); string[] names = ClassifyFrogEvent(ae.DominantFreq, oscRate, dt); ae.Name = names[0]; ae.Name2 = names[1]; } return(Tuple.Create(sonogram, hitsMatrix, plots, frogEvents, tsRecordingtDuration)); } //Analysis()
} //Analyze() /// <summary> /// ################ THE KEY ANALYSIS METHOD /// Returns a DataTable /// </summary> /// <param name="fiSegmentOfSourceFile"></param> /// <param name="configDict"></param> /// <param name="segmentStartOffset"></param> /// <param name="diOutputDir"></param> public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values - ignor those set by user int frameSize = 1024; double windowOverlap = 0.0; int upperBandMinHz = int.Parse(configDict[KeyUpperfreqbandBtm]); int upperBandMaxHz = int.Parse(configDict[KeyUpperfreqbandTop]); int lowerBandMinHz = int.Parse(configDict[KeyLowerfreqbandBtm]); int lowerBandMaxHz = int.Parse(configDict[KeyLowerfreqbandTop]); double decibelThreshold = double.Parse(configDict[KeyDecibelThreshold]);; //dB double intensityThreshold = double.Parse(configDict[KeyIntensityThreshold]); //in 0-1 double minDuration = double.Parse(configDict[KeyMinDuration]); // seconds double maxDuration = double.Parse(configDict[KeyMaxDuration]); // seconds double minPeriod = double.Parse(configDict[KeyMinPeriod]); // seconds double maxPeriod = double.Parse(configDict[KeyMaxPeriod]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = recording.BaseName; sonoConfig.WindowSize = frameSize; sonoConfig.WindowOverlap = windowOverlap; //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"); TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double framesPerSecond = freqBinWidth; //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1; int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1; int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1; int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap); //double[,] matrix = results2.Item3; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored. //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording ////double[] envelope = results2.Item2; //double windowPower = results2.Item4; double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, (rowCount - 1), lowerBandMaxBin); double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, (rowCount - 1), upperBandMaxBin); int step = (int)Math.Round(framesPerSecond); //take one second steps int stepCount = rowCount / step; int sampleLength = 64; //64 frames = 3.7 seconds. Suitable for Lewins Rail. double[] intensity = new double[rowCount]; double[] periodicity = new double[rowCount]; //###################################################################### //ii: DO THE ANALYSIS AND RECOVER SCORES for (int i = 0; i < stepCount; i++) { int start = step * i; double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength); double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength); if ((lowerSubarray.Length != sampleLength) || (upperSubarray.Length != sampleLength)) { break; } var spectrum = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray); int zeroCount = 3; for (int s = 0; s < zeroCount; s++) { spectrum[s] = 0.0; //in real data these bins are dominant and hide other frequency content } spectrum = DataTools.NormaliseArea(spectrum); int maxId = DataTools.GetMaxIndex(spectrum); double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds if ((period < minPeriod) || (period > maxPeriod)) { continue; } for (int j = 0; j < sampleLength; j++) //lay down score for sample length { if (intensity[start + j] < spectrum[maxId]) { intensity[start + j] = spectrum[maxId]; } periodicity[start + j] = period; } } //###################################################################### //iii: CONVERT SCORES TO ACOUSTIC EVENTS intensity = DataTools.filterMovingAverage(intensity, 5); List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( intensity, lowerBandMinHz, upperBandMaxHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); CropEvents(predictedEvents, upperArray); var hits = new double[rowCount, colCount]; return(Tuple.Create(sonogram, hits, intensity, predictedEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// THIS IS THE CORE DETECTION METHOD /// Detects the human voice /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double minDuration = double.Parse(configDict["MIN_DURATION"]); // seconds double maxDuration = double.Parse(configDict["MAX_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig { //default values config SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; var tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); //ii: DETECT HARMONICS int zeroBinCount = 4; //to remove low freq content which dominates the spectrum var results = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount); double[] intensity = results.Item1; double[] periodicity = results.Item2; //an array of periodicity scores //intensity = DataTools.filterMovingAverage(intensity, 3); //expect humans to have max power >100 and < 1000 Hz. Set these bounds int lowerHumanMaxBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise int upperHumanMaxBound = (int)(3000 / freqBinWidth); //ignore above 2500 hz double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < intensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < lowerHumanMaxBound; j++) { spectrum[j] = 0.0; } for (int j = upperHumanMaxBound; j < spectrum.Length; j++) { spectrum[j] = 0.0; } double[] peakvalues = DataTools.GetPeakValues(spectrum); int maxIndex1 = DataTools.GetMaxIndex(peakvalues); peakvalues[maxIndex1] = 0.0; int maxIndex2 = DataTools.GetMaxIndex(peakvalues); int avMaxBin = (maxIndex1 + maxIndex2) / 2; //int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); int freqWithMaxPower = (int)Math.Round(avMaxBin * freqBinWidth); double discount = 1.0; if (freqWithMaxPower > 1000) { discount = 0.0; } else if (freqWithMaxPower < 500) { discount = 0.0; } //set scoreArray[r] - ignore locations with low intensity if (intensity[r] > intensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = intensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( scoreArray, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); //remove isolated speech events - expect humans to talk like politicians //predictedEvents = Human2.FilterHumanSpeechEvents(predictedEvents); Plot plot = new Plot(AnalysisName, intensity, intensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()