public MfccConfiguration(ConfigDictionary config) { this.FilterbankCount = config.GetInt(ConfigKeys.Mfcc.Key_FilterbankCount); this.DoMelScale = config.GetBoolean(ConfigKeys.Mfcc.Key_DoMelScale); this.CcCount = config.GetInt(ConfigKeys.Mfcc.Key_CcCount); //number of cepstral coefficients this.IncludeDelta = config.GetBoolean(ConfigKeys.Mfcc.Key_IncludeDelta); this.IncludeDoubleDelta = config.GetBoolean(ConfigKeys.Mfcc.Key_IncludeDoubleDelta); }
/// <summary> /// A WRAPPER AROUND THE analyser.Analyze(analysisSettings) METHOD /// To be called as an executable with command line arguments. /// </summary> public static void Execute(Arguments arguments) { Contract.Requires(arguments != null); var(analysisSettings, segmentSettings) = arguments.ToAnalysisSettings(); TimeSpan offsetStart = TimeSpan.FromSeconds(arguments.Start ?? 0); TimeSpan duration = TimeSpan.FromSeconds(arguments.Duration ?? 0); int resampleRate = ConfigDictionary.GetInt(AnalysisKeys.ResampleRate, analysisSettings.ConfigDict); // EXTRACT THE REQUIRED RECORDING SEGMENT FileInfo tempF = segmentSettings.SegmentAudioFile; if (tempF.Exists) { tempF.Delete(); } if (duration == TimeSpan.Zero) { // Process entire file AudioFilePreparer.PrepareFile(arguments.Source, tempF, new AudioUtilityRequest { TargetSampleRate = resampleRate }, analysisSettings.AnalysisTempDirectoryFallback); ////var fiSegment = AudioFilePreparer.PrepareFile(diOutputDir, fiSourceFile, , Human2.RESAMPLE_RATE); } else { AudioFilePreparer.PrepareFile(arguments.Source, tempF, new AudioUtilityRequest { TargetSampleRate = resampleRate, OffsetStart = offsetStart, OffsetEnd = offsetStart.Add(duration) }, analysisSettings.AnalysisTempDirectoryFallback); ////var fiSegmentOfSourceFile = AudioFilePreparer.PrepareFile(diOutputDir, new FileInfo(recordingPath), MediaTypes.MediaTypeWav, TimeSpan.FromMinutes(2), TimeSpan.FromMinutes(3), RESAMPLE_RATE); } //DO THE ANALYSIS // ############################################################################################################################################# // BROKEN! throw new NotImplementedException("Broken in code updates"); IAnalyser2 analyser = null; //new Rain_OBSOLETE(); AnalysisResult2 result = analyser.Analyze <FileInfo>(analysisSettings, null /*broken */); /*DataTable dt = result.Data; * //############################################################################################################################################# * * // ADD IN ADDITIONAL INFO TO RESULTS TABLE * if (dt != null) * { * int iter = 0; // dummy - iteration number would ordinarily be available at this point. * int startMinute = (int)offsetStart.TotalMinutes; * foreach (DataRow row in dt.Rows) * { * row[InitialiseIndexProperties.KEYRankOrder] = iter; * row[InitialiseIndexProperties.KEYStartMinute] = startMinute; * row[InitialiseIndexProperties.KEYSegmentDuration] = result.AudioDuration.TotalSeconds; * } * * CsvTools.DataTable2CSV(dt, segmentSettings.SegmentSummaryIndicesFile.FullName); * //DataTableTools.WriteTable2Console(dt); * }*/ }
/// <summary> /// DoSnr = true; /// DoFullBandwidth = false; /// </summary> /// <param name="config">read from file</param> private void Initialize(ConfigDictionary config) { if (config == null) { throw new ArgumentNullException(nameof(config)); } this.CallName = config.GetString(ConfigKeys.Recording.Key_RecordingCallName); this.SourceFName = config.GetString(ConfigKeys.Recording.Key_RecordingFileName); var duration = config.GetDoubleNullable("WAV_DURATION"); if (duration != null) { this.Duration = TimeSpan.FromSeconds(duration.Value); } //FRAMING PARAMETERS this.WindowSize = config.GetInt(ConfigKeys.Windowing.Key_WindowSize); this.WindowOverlap = config.GetDouble(ConfigKeys.Windowing.Key_WindowOverlap); //NOISE REDUCTION PARAMETERS this.DoSnr = true; // set false if only want to string noisereduce = config.GetString(AnalysisKeys.NoiseReductionType); //this.NoiseReductionType = (NoiseReductionType)Enum.Parse(typeof(NoiseReductionType), noisereduce.ToUpperInvariant()); this.NoiseReductionType = (NoiseReductionType)Enum.Parse(typeof(NoiseReductionType), noisereduce); //FREQ BAND PARAMETERS this.DoFullBandwidth = false; // set true if only want to this.MinFreqBand = config.GetIntNullable(ConfigKeys.Mfcc.Key_MinFreq); this.MaxFreqBand = config.GetIntNullable(ConfigKeys.Mfcc.Key_MaxFreq); this.MidFreqBand = this.MinFreqBand + ((this.MaxFreqBand - this.MinFreqBand) / 2); //SEGMENTATION PARAMETERS EndpointDetectionConfiguration.SetConfig(config); //MFCC PARAMETERS this.DoMelScale = config.GetBoolean(ConfigKeys.Mfcc.Key_DoMelScale); this.mfccConfig = new MfccConfiguration(config); this.DeltaT = config.GetInt(ConfigKeys.Mfcc.Key_DeltaT); // Frames between acoustic vectors // for generating only spectrogram. }
/// <summary> /// DoSnr = true; /// DoFullBandwidth = false; /// </summary> /// <param name="configDict">Dictionary of config values</param> private void Initialize(Dictionary <string, string> configDict) { this.CallName = configDict[ConfigKeys.Recording.Key_RecordingCallName]; this.SourceFName = configDict[ConfigKeys.Recording.Key_RecordingFileName]; // var duration = config.GetDoubleNullable("WAV_DURATION"); // if (duration != null) Duration = TimeSpan.FromSeconds(duration.Value); //FRAMING PARAMETERS this.WindowSize = 512; // default value if (configDict.ContainsKey(AnalysisKeys.FrameLength)) { this.WindowSize = ConfigDictionary.GetInt(AnalysisKeys.FrameLength, configDict); } this.WindowOverlap = 0.0; // default value if (configDict.ContainsKey(AnalysisKeys.FrameOverlap)) { this.WindowOverlap = ConfigDictionary.GetDouble(AnalysisKeys.FrameOverlap, configDict); } this.sampleRate = 0; if (configDict.ContainsKey(AnalysisKeys.ResampleRate)) { this.sampleRate = ConfigDictionary.GetInt("ResampleRate", configDict); } //NOISE REDUCTION PARAMETERS // NoiseReductionParameter = config.GetDouble(SNR.key_Snr.key_); this.DoSnr = true; // set false if only want to this.NoiseReductionType = NoiseReductionType.None; if (configDict.ContainsKey(AnalysisKeys.NoiseReductionType)) { string noiseReductionType = configDict[AnalysisKeys.NoiseReductionType]; // this.NoiseReductionType = (NoiseReductionType)Enum.Parse(typeof(NoiseReductionType), noiseReductionType.ToUpperInvariant()); this.NoiseReductionType = (NoiseReductionType)Enum.Parse(typeof(NoiseReductionType), noiseReductionType); } // FREQ BAND PARAMETERS this.DoFullBandwidth = true; // set true if only want to // MinFreqBand = config.GetIntNullable(ConfigKeys.Mfcc.Key_MinFreq); // MaxFreqBand = config.GetIntNullable(ConfigKeys.Mfcc.Key_MaxFreq); // MidFreqBand = MinFreqBand + ((MaxFreqBand - MinFreqBand) / 2); // SEGMENTATION PARAMETERS // EndpointDetectionConfiguration.SetConfig(config); // MFCC PARAMETERS // DoMelScale = config.GetBoolean(ConfigKeys.Mfcc.Key_DoMelScale); // mfccConfig = new MfccConfiguration(config); // DeltaT = config.GetInt(ConfigKeys.Mfcc.Key_DeltaT); // Frames between acoustic vectors }
public static SonogramConfig Load(string configFile) { Log.WriteLine("config file =" + configFile); if (!File.Exists(configFile)) { Log.WriteLine("The configuration file <" + configFile + "> does not exist!"); Log.WriteLine("Initialising application with default parameter values."); return(new SonogramConfig()); } else { var config = new ConfigDictionary(configFile); if (config.GetInt("VERBOSITY") > 0) { Log.Verbosity = 1; Log.WriteIfVerbose("Verbosity set true in Application Config file."); } return(new SonogramConfig(config)); } }
Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> config) { int minHzMale = ConfigDictionary.GetInt(LSKiwi1.key_MIN_HZ_MALE, config); int maxHzMale = ConfigDictionary.GetInt(LSKiwi1.key_MAX_HZ_MALE, config); int minHzFemale = ConfigDictionary.GetInt(LSKiwi1.key_MIN_HZ_FEMALE, config); int maxHzFemale = ConfigDictionary.GetInt(LSKiwi1.key_MAX_HZ_FEMALE, config); int frameLength = ConfigDictionary.GetInt(LSKiwi1.key_FRAME_LENGTH, config); double frameOverlap = ConfigDictionary.GetDouble(LSKiwi1.key_FRAME_OVERLAP, config); //double dctDuration = ConfigDictionary.GetDouble(LSKiwi1.key_DCT_DURATION, config); //double dctThreshold = ConfigDictionary.GetDouble(LSKiwi1.key_DCT_THRESHOLD, config); double minPeriod = ConfigDictionary.GetDouble(LSKiwi1.key_MIN_PERIODICITY, config); double maxPeriod = ConfigDictionary.GetDouble(LSKiwi1.key_MAX_PERIODICITY, config); double eventThreshold = ConfigDictionary.GetDouble(Keys.EVENT_THRESHOLD, config); double minDuration = ConfigDictionary.GetDouble(LSKiwi1.key_MIN_DURATION, config); //minimum event duration to qualify as species call double maxDuration = ConfigDictionary.GetDouble(LSKiwi1.key_MAX_DURATION, config); //maximum event duration to qualify as species call AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName); if (recording == null) { Console.WriteLine("AudioRecording == null. Analysis not possible."); return(null); } TimeSpan tsRecordingtDuration = recording.Duration(); //i: MAKE SONOGRAM SonogramConfig sonoConfig = new SonogramConfig(); //default values config sonoConfig.SourceFName = recording.FileName; sonoConfig.WindowSize = frameLength; sonoConfig.WindowOverlap = frameOverlap; sonoConfig.NoiseReductionType = NoiseReductionType.STANDARD; //MUST DO NOISE REMOVAL BaseSonogram sonogram = new SpectralSonogram(sonoConfig, recording.GetWavReader()); //DETECT MALE KIWI var resultsMale = DetectKiwi(sonogram, minHzMale, maxHzMale, /*dctDuration, dctThreshold,*/ minPeriod, maxPeriod, eventThreshold, minDuration, maxDuration); var scoresM = resultsMale.Item1; var hitsM = resultsMale.Item2; var predictedEventsM = resultsMale.Item3; foreach (AcousticEvent ev in predictedEventsM) { ev.Name = "LSK(m)"; } //DETECT FEMALE KIWI var resultsFemale = DetectKiwi(sonogram, minHzFemale, maxHzFemale, /* dctDuration, dctThreshold,*/ minPeriod, maxPeriod, eventThreshold, minDuration, maxDuration); var scoresF = resultsFemale.Item1; var hitsF = resultsFemale.Item2; var predictedEventsF = resultsFemale.Item3; foreach (AcousticEvent ev in predictedEventsF) { ev.Name = "LSK(f)"; } //combine the male and female results hitsM = MatrixTools.AddMatrices(hitsM, hitsF); foreach (AcousticEvent ev in predictedEventsF) { predictedEventsM.Add(ev); } foreach (double[] array in scoresF) { scoresM.Add(array); } return(System.Tuple.Create(sonogram, hitsM, scoresM, predictedEventsM, tsRecordingtDuration)); } //Analysis()
public static Tuple <Dictionary <string, double>, TimeSpan> RainAnalyser(FileInfo fiAudioFile, AnalysisSettings analysisSettings, SourceMetadata originalFile) { Dictionary <string, string> config = analysisSettings.ConfigDict; // get parameters for the analysis int frameSize = IndexCalculateConfig.DefaultWindowSize; double windowOverlap = 0.0; int lowFreqBound = 1000; int midFreqBound = 8000; if (config.ContainsKey(AnalysisKeys.FrameLength)) { frameSize = ConfigDictionary.GetInt(AnalysisKeys.FrameLength, config); } if (config.ContainsKey(key_LOW_FREQ_BOUND)) { lowFreqBound = ConfigDictionary.GetInt(key_LOW_FREQ_BOUND, config); } if (config.ContainsKey(key_MID_FREQ_BOUND)) { midFreqBound = ConfigDictionary.GetInt(key_MID_FREQ_BOUND, config); } if (config.ContainsKey(AnalysisKeys.FrameOverlap)) { windowOverlap = ConfigDictionary.GetDouble(AnalysisKeys.FrameOverlap, config); } // get recording segment AudioRecording recording = new AudioRecording(fiAudioFile.FullName); // calculate duration/size of various quantities. int signalLength = recording.WavReader.Samples.Length; TimeSpan audioDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds); double duration = frameSize * (1 - windowOverlap) / (double)recording.SampleRate; TimeSpan frameDuration = TimeSpan.FromTicks((long)(duration * TimeSpan.TicksPerSecond)); int chunkDuration = 10; //seconds double framesPerSecond = 1 / frameDuration.TotalSeconds; int chunkCount = (int)Math.Round(audioDuration.TotalSeconds / (double)chunkDuration); int framesPerChunk = (int)(chunkDuration * framesPerSecond); string[] classifications = new string[chunkCount]; //i: EXTRACT ENVELOPE and FFTs double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1); var signalextract = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, epsilon, frameSize, windowOverlap); double[] envelope = signalextract.Envelope; double[,] spectrogram = signalextract.AmplitudeSpectrogram; //amplitude spectrogram int colCount = spectrogram.GetLength(1); int nyquistFreq = recording.Nyquist; int nyquistBin = spectrogram.GetLength(1) - 1; double binWidth = nyquistFreq / (double)spectrogram.GetLength(1); // calculate the bin id of boundary between mid and low frequency spectrum int lowBinBound = (int)Math.Ceiling(lowFreqBound / binWidth); // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this iwll be less than 17640/2. int originalAudioNyquist = originalFile.SampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz. if (recording.Nyquist > originalAudioNyquist) { nyquistFreq = originalAudioNyquist; nyquistBin = (int)Math.Floor(originalAudioNyquist / binWidth); } // vi: CALCULATE THE ACOUSTIC COMPLEXITY INDEX var subBandSpectrogram = MatrixTools.Submatrix(spectrogram, 0, lowBinBound, spectrogram.GetLength(0) - 1, nyquistBin); double[] aciArray = AcousticComplexityIndex.CalculateACI(subBandSpectrogram); double aci1 = aciArray.Average(); // ii: FRAME ENERGIES - // convert signal to decibels and subtract background noise. double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction var results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(signalextract.Envelope), StandardDeviationCount); var dBarray = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal); //// vii: remove background noise from the full spectrogram i.e. BIN 1 to Nyquist //spectrogramData = MatrixTools.Submatrix(spectrogramData, 0, 1, spectrogramData.GetLength(0) - 1, nyquistBin); //const double SpectralBgThreshold = 0.015; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background //double[] modalValues = SNR.CalculateModalValues(spectrogramData); // calculate modal value for each freq bin. //modalValues = DataTools.filterMovingAverage(modalValues, 7); // smooth the modal profile //spectrogramData = SNR.SubtractBgNoiseFromSpectrogramAndTruncate(spectrogramData, modalValues); //spectrogramData = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogramData, SpectralBgThreshold); //set up the output if (Verbose) { LoggedConsole.WriteLine("{0:d2}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2"); } StringBuilder sb = null; if (WriteOutputFile) { string header = string.Format("{0:d2},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2"); sb = new StringBuilder(header + "\n"); } Dictionary <string, double> dict = RainIndices.GetIndices(envelope, audioDuration, frameDuration, spectrogram, lowFreqBound, midFreqBound, binWidth); return(Tuple.Create(dict, audioDuration)); } //Analysis()
public static void MakeSonogramWithSox(FileInfo fiAudio, Dictionary <string, string> configDict, FileInfo output) { var soxPath = new FileInfo(AppConfigHelper.SoxExe); if (!soxPath.Exists) { LoggedConsole.WriteLine("SOX ERROR: Path does not exist: <{0}>", soxPath.FullName); throw new FileNotFoundException("SOX ERROR: Path for executable does not exist.", soxPath.FullName); } // must quote the path because has a space in it. string soxCmd = "\"" + AppConfigHelper.SoxExe + "\""; string title = string.Empty; if (configDict.ContainsKey(AnalysisKeys.SonogramTitle)) { title = " -t " + configDict[AnalysisKeys.SonogramTitle]; } string comment = string.Empty; if (configDict.ContainsKey(AnalysisKeys.SonogramComment)) { comment = " -c " + configDict[AnalysisKeys.SonogramComment]; } string axes = "-r"; if (configDict.ContainsKey(AnalysisKeys.AddAxes) && !ConfigDictionary.GetBoolean(AnalysisKeys.AddAxes, configDict)) { axes = string.Empty; } string coloured = " -m "; // default if (configDict.ContainsKey(AnalysisKeys.SonogramColored) && ConfigDictionary.GetBoolean(AnalysisKeys.SonogramColored, configDict)) { coloured = string.Empty; } string quantisation = " -q 64 "; // default if (configDict.ContainsKey(AnalysisKeys.SonogramQuantisation)) { quantisation = " -q " + ConfigDictionary.GetInt(AnalysisKeys.SonogramQuantisation, configDict); } // Path\sox.exe -V "sourcefile.wav" -n rate 22050 spectrogram -m -r -l -a -q 249 -w hann -y 257 -X 43.06640625 -z 100 -o "imagefile.png" //string soxCommandLineArguments = " -V \"{0}\" -n rate 22050 spectrogram -m -r -l -a -q 249 -w hann -y 257 -X 43.06640625 -z 100 -o \"{1}\""; //greyscale only //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -m -l -o \"{1}\""; //greyscale with time, freq and intensity scales //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -m -o \"{1}\""; //reverse image greyscale with time, freq and intensity scales //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -l -o \"{1}\""; //colour with time, freq and intensity scales //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -m -q 64 -r -l -o \"{6}\""; //64 grey scale, with time, freq and intensity scales const string SoxCommandLineArguments = " -V \"{0}\" -n spectrogram -m {1} -q 64 -l -o \"{6}\""; //64 grey scale, with time, freq and intensity scales //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -l {1} {2} {3} {4} {5} -o \"{6}\""; //64 grey scale, with time, freq and intensity scales // FOR COMMAND LINE OPTIONS SEE: http://sox.sourceforge.net/sox.html // −a Suppress display of axis lines. This is sometimes useful in helping to discern artefacts at the spectrogram edges. // -l Print firendly monochrome spectrogram. // −m Creates a monochrome spectrogram (the default is colour). // -q Number of intensity quanitisation levels/colors - try -q 64 // −r Raw spectrogram: suppress the display of axes and legends. // −t text Set the image title - text to display above the spectrogram. // −c text Set (or clear) the image comment - text to display below and to the left of the spectrogram. // trim 20 30 displays spectrogram of 30 seconds duratoin starting at 20 seconds. var args = string.Format(SoxCommandLineArguments, fiAudio.FullName, title, comment, axes, coloured, quantisation, output.FullName); using (var process = new ProcessRunner(soxCmd)) { process.Run(args, output.DirectoryName); } }