} // GetSpectralMaxima() /// <summary> /// THIS METHOD CALLED ONLY FROM THE Frogs.CS class. /// returns an array showing which freq bin in each frame has the maximum amplitude. /// However only returns values for those frames in the neighbourhood of an envelope peak. /// </summary> /// <param name="decibelsPerFrame"></param> /// <param name="spectrogram"></param> /// <param name="threshold"></param> /// <param name="nhLimit"></param> /// <returns></returns> public static Tuple <int[], double[, ]> GetSpectralMaxima(double[] decibelsPerFrame, double[,] spectrogram, double threshold, int nhLimit) { int rowCount = spectrogram.GetLength(0); int colCount = spectrogram.GetLength(1); var peaks = DataTools.GetPeakValues(decibelsPerFrame); var maxFreqArray = new int[rowCount]; //array (one element per frame) indicating which freq bin has max amplitude. var hitsMatrix = new double[rowCount, colCount]; for (int r = nhLimit; r < rowCount - nhLimit; r++) { if (peaks[r] < threshold) { continue; } //find local freq maxima and store in freqArray & hits matrix. for (int nh = -nhLimit; nh < nhLimit; nh++) { double[] spectrum = MatrixTools.GetRow(spectrogram, r + nh); spectrum[0] = 0.0; // set DC = 0.0 just in case it is max. int maxFreqbin = DataTools.GetMaxIndex(spectrum); if (spectrum[maxFreqbin] > threshold) //only record spectral peak if it is above threshold. { maxFreqArray[r + nh] = maxFreqbin; //if ((spectrum[maxFreqbin] > dBThreshold) && (sonogram.Data[r, maxFreqbin] >= sonogram.Data[r - 1, maxFreqbin]) && (sonogram.Data[r, maxFreqbin] >= sonogram.Data[r + 1, maxFreqbin])) hitsMatrix[r + nh, maxFreqbin] = 1.0; } } } return(Tuple.Create(maxFreqArray, hitsMatrix)); } // GetSpectralMaxima()
//Analyze() /// <summary> /// ################ THE KEY ANALYSIS METHOD /// Returns a DataTable /// </summary> /// <param name="fiSegmentOfSourceFile"></param> /// <param name="analysisSettings"></param> /// <param name="originalSampleRate"></param> /// <param name="segmentStartOffset"></param> /// <param name="configDict"></param> /// <param name="diOutputDir"></param> public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, AnalysisSettings analysisSettings, int originalSampleRate, TimeSpan segmentStartOffset) { Dictionary <string, string> configDict = analysisSettings.ConfigDict; int originalAudioNyquist = originalSampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz. //set default values - ignore those set by user int frameSize = 32; double windowOverlap = 0.3; int xCorrelationLength = 256; //for Xcorrelation - 256 frames @801 = 320ms, almost 1/3 second. //int xCorrelationLength = 128; //for Xcorrelation - 128 frames @801 = 160ms, almost 1/6 second. //int xCorrelationLength = 64; //for Xcorrelation - 64 frames @128 = 232ms, almost 1/4 second. //int xCorrelationLength = 16; //for Xcorrelation - 16 frames @128 = 232ms, almost 1/4 second. double dBThreshold = 12.0; // read frog data to datatable var dt = CsvTools.ReadCSVToTable(configDict[key_FROG_DATA], true); // read file contining parameters of frog calls to a table double intensityThreshold = double.Parse(configDict[AnalysisKeys.IntensityThreshold]); //in 0-1 double minDuration = double.Parse(configDict[AnalysisKeys.MinDuration]); // seconds double maxDuration = double.Parse(configDict[AnalysisKeys.MaxDuration]); // seconds double minPeriod = double.Parse(configDict[AnalysisKeys.MinPeriodicity]); // seconds double maxPeriod = double.Parse(configDict[AnalysisKeys.MaxPeriodicity]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); if (recording == null) { LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = recording.BaseName; sonoConfig.WindowSize = frameSize; sonoConfig.WindowOverlap = windowOverlap; //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE"); sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"); //must do noise removal TimeSpan tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; double frameOffset = sonoConfig.GetFrameOffset(sr); double framesPerSecond = 1 / frameOffset; BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); //iii: GET TRACKS int nhLimit = 3; //limit of neighbourhood around maximum var peaks = DataTools.GetPeakValues(sonogram.DecibelsPerFrame); var tuple = SpectralTrack.GetSpectralMaxima(sonogram.DecibelsPerFrame, sonogram.Data, dBThreshold, nhLimit); var maxFreqArray = tuple.Item1; //array (one element per frame) indicating which freq bin has max amplitude. var hitsMatrix = tuple.Item2; int herzOffset = 0; int maxFreq = 6000; var tracks = SpectralTrack.GetSpectralTracks(maxFreqArray, framesPerSecond, freqBinWidth, herzOffset, SpectralTrack.MIN_TRACK_DURATION, SpectralTrack.MAX_INTRASYLLABLE_GAP, maxFreq); double severity = 0.5; double dynamicRange = 60; // deciBels above background noise. BG noise has already been removed from each bin. // convert sonogram to a list of frequency bin arrays var listOfFrequencyBins = SpectrogramTools.Sonogram2ListOfFreqBinArrays(sonogram, dynamicRange); int minFrameLength = SpectralTrack.FrameCountEquivalent(SpectralTrack.MIN_TRACK_DURATION, framesPerSecond); for (int i = tracks.Count - 1; i >= 0; i--) { tracks[i].CropTrack(listOfFrequencyBins, severity); if (tracks[i].Length < minFrameLength) { tracks.Remove(tracks[i]); } } // foreach track foreach (SpectralTrack track in tracks) // find any periodicity in the track and calculate its score. { SpectralTrack.DetectTrackPeriodicity(track, xCorrelationLength, listOfFrequencyBins, sonogram.FramesPerSecond); } // foreach track int rowCount = sonogram.Data.GetLength(0); int MAX_FREQ_BOUND = 6000; int topBin = (int)Math.Round(MAX_FREQ_BOUND / freqBinWidth); var plots = CreateScorePlots(tracks, rowCount, topBin); //iv: CONVERT TRACKS TO ACOUSTIC EVENTS List <AcousticEvent> frogEvents = SpectralTrack.ConvertTracks2Events(tracks, segmentStartOffset); // v: GET FROG IDs //var frogEvents = new List<AcousticEvent>(); foreach (AcousticEvent ae in frogEvents) { double oscRate = 1 / ae.Periodicity; // ae.DominantFreq // ae.Score // ae.Duration //ClassifyFrogEvent(ae); string[] names = ClassifyFrogEvent(ae.DominantFreq, oscRate, dt); ae.Name = names[0]; ae.Name2 = names[1]; } return(Tuple.Create(sonogram, hitsMatrix, plots, frogEvents, tsRecordingtDuration)); } //Analysis()
/// <summary> /// THIS IS THE CORE DETECTION METHOD /// Detects the human voice /// </summary> public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset) { //set default values int frameLength = 1024; if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]); } double windowOverlap = 0.0; int minHz = int.Parse(configDict["MIN_HZ"]); int minFormantgap = int.Parse(configDict["MIN_FORMANT_GAP"]); int maxFormantgap = int.Parse(configDict["MAX_FORMANT_GAP"]); double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1 double minDuration = double.Parse(configDict["MIN_DURATION"]); // seconds double maxDuration = double.Parse(configDict["MAX_DURATION"]); // seconds AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig { //default values config SourceFName = recording.BaseName, WindowSize = frameLength, WindowOverlap = windowOverlap, NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"), }; var tsRecordingtDuration = recording.Duration; int sr = recording.SampleRate; double freqBinWidth = sr / (double)sonoConfig.WindowSize; //############################################################################################################################################# //window sr frameDuration frames/sec hz/bin 64frameDuration hz/64bins hz/128bins // 1024 22050 46.4ms 21.5 21.5 2944ms 1376hz 2752hz // 1024 17640 58.0ms 17.2 17.2 3715ms 1100hz 2200hz // 2048 17640 116.1ms 8.6 8.6 7430ms 551hz 1100hz //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2. //assuming sr=17640 and window=1024, then 64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600 //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700 int numberOfBins = 64; int minBin = (int)Math.Round(minHz / freqBinWidth) + 1; int maxbin = minBin + numberOfBins - 1; int maxHz = (int)Math.Round(minHz + (numberOfBins * freqBinWidth)); BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); int rowCount = sonogram.Data.GetLength(0); int colCount = sonogram.Data.GetLength(1); double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin); //ii: DETECT HARMONICS int zeroBinCount = 4; //to remove low freq content which dominates the spectrum var results = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount); double[] intensity = results.Item1; double[] periodicity = results.Item2; //an array of periodicity scores //intensity = DataTools.filterMovingAverage(intensity, 3); //expect humans to have max power >100 and < 1000 Hz. Set these bounds int lowerHumanMaxBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise int upperHumanMaxBound = (int)(3000 / freqBinWidth); //ignore above 2500 hz double[] scoreArray = new double[intensity.Length]; for (int r = 0; r < rowCount; r++) { if (intensity[r] < intensityThreshold) { continue; } //ignore locations with incorrect formant gap double herzPeriod = periodicity[r] * freqBinWidth; if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap) { continue; } //find freq having max power and use info to adjust score. double[] spectrum = MatrixTools.GetRow(sonogram.Data, r); for (int j = 0; j < lowerHumanMaxBound; j++) { spectrum[j] = 0.0; } for (int j = upperHumanMaxBound; j < spectrum.Length; j++) { spectrum[j] = 0.0; } double[] peakvalues = DataTools.GetPeakValues(spectrum); int maxIndex1 = DataTools.GetMaxIndex(peakvalues); peakvalues[maxIndex1] = 0.0; int maxIndex2 = DataTools.GetMaxIndex(peakvalues); int avMaxBin = (maxIndex1 + maxIndex2) / 2; //int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth); int freqWithMaxPower = (int)Math.Round(avMaxBin * freqBinWidth); double discount = 1.0; if (freqWithMaxPower > 1000) { discount = 0.0; } else if (freqWithMaxPower < 500) { discount = 0.0; } //set scoreArray[r] - ignore locations with low intensity if (intensity[r] > intensityThreshold) { scoreArray[r] = intensity[r] * discount; } } //transfer info to a hits matrix. var hits = new double[rowCount, colCount]; double threshold = intensityThreshold * 0.75; //reduced threshold for display of hits for (int r = 0; r < rowCount; r++) { if (scoreArray[r] < threshold) { continue; } double herzPeriod = periodicity[r] * freqBinWidth; for (int c = minBin; c < maxbin; c++) { //hits[r, c] = herzPeriod / (double)380; //divide by 380 to get a relativePeriod; hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap; //to get a relativePeriod; } } //iii: CONVERT TO ACOUSTIC EVENTS List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events( scoreArray, minHz, maxHz, sonogram.FramesPerSecond, freqBinWidth, intensityThreshold, minDuration, maxDuration, segmentStartOffset); //remove isolated speech events - expect humans to talk like politicians //predictedEvents = Human2.FilterHumanSpeechEvents(predictedEvents); Plot plot = new Plot(AnalysisName, intensity, intensityThreshold); return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration)); } //Analysis()