public MfccConfiguration(ConfigDictionary config) { this.FilterbankCount = config.GetInt(ConfigKeys.Mfcc.Key_FilterbankCount); this.DoMelScale = config.GetBoolean(ConfigKeys.Mfcc.Key_DoMelScale); this.CcCount = config.GetInt(ConfigKeys.Mfcc.Key_CcCount); //number of cepstral coefficients this.IncludeDelta = config.GetBoolean(ConfigKeys.Mfcc.Key_IncludeDelta); this.IncludeDoubleDelta = config.GetBoolean(ConfigKeys.Mfcc.Key_IncludeDoubleDelta); }
/// <summary> /// DoSnr = true; /// DoFullBandwidth = false; /// </summary> /// <param name="config">read from file</param> private void Initialize(ConfigDictionary config) { if (config == null) { throw new ArgumentNullException(nameof(config)); } this.CallName = config.GetString(ConfigKeys.Recording.Key_RecordingCallName); this.SourceFName = config.GetString(ConfigKeys.Recording.Key_RecordingFileName); var duration = config.GetDoubleNullable("WAV_DURATION"); if (duration != null) { this.Duration = TimeSpan.FromSeconds(duration.Value); } //FRAMING PARAMETERS this.WindowSize = config.GetInt(ConfigKeys.Windowing.Key_WindowSize); this.WindowOverlap = config.GetDouble(ConfigKeys.Windowing.Key_WindowOverlap); //NOISE REDUCTION PARAMETERS this.DoSnr = true; // set false if only want to string noisereduce = config.GetString(AnalysisKeys.NoiseReductionType); //this.NoiseReductionType = (NoiseReductionType)Enum.Parse(typeof(NoiseReductionType), noisereduce.ToUpperInvariant()); this.NoiseReductionType = (NoiseReductionType)Enum.Parse(typeof(NoiseReductionType), noisereduce); //FREQ BAND PARAMETERS this.DoFullBandwidth = false; // set true if only want to this.MinFreqBand = config.GetIntNullable(ConfigKeys.Mfcc.Key_MinFreq); this.MaxFreqBand = config.GetIntNullable(ConfigKeys.Mfcc.Key_MaxFreq); this.MidFreqBand = this.MinFreqBand + ((this.MaxFreqBand - this.MinFreqBand) / 2); //SEGMENTATION PARAMETERS EndpointDetectionConfiguration.SetConfig(config); //MFCC PARAMETERS this.DoMelScale = config.GetBoolean(ConfigKeys.Mfcc.Key_DoMelScale); this.mfccConfig = new MfccConfiguration(config); this.DeltaT = config.GetInt(ConfigKeys.Mfcc.Key_DeltaT); // Frames between acoustic vectors // for generating only spectrogram. }
public static void MakeSonogramWithSox(FileInfo fiAudio, Dictionary <string, string> configDict, FileInfo output) { var soxPath = new FileInfo(AppConfigHelper.SoxExe); if (!soxPath.Exists) { LoggedConsole.WriteLine("SOX ERROR: Path does not exist: <{0}>", soxPath.FullName); throw new FileNotFoundException("SOX ERROR: Path for executable does not exist.", soxPath.FullName); } // must quote the path because has a space in it. string soxCmd = "\"" + AppConfigHelper.SoxExe + "\""; string title = string.Empty; if (configDict.ContainsKey(AnalysisKeys.SonogramTitle)) { title = " -t " + configDict[AnalysisKeys.SonogramTitle]; } string comment = string.Empty; if (configDict.ContainsKey(AnalysisKeys.SonogramComment)) { comment = " -c " + configDict[AnalysisKeys.SonogramComment]; } string axes = "-r"; if (configDict.ContainsKey(AnalysisKeys.AddAxes) && !ConfigDictionary.GetBoolean(AnalysisKeys.AddAxes, configDict)) { axes = string.Empty; } string coloured = " -m "; // default if (configDict.ContainsKey(AnalysisKeys.SonogramColored) && ConfigDictionary.GetBoolean(AnalysisKeys.SonogramColored, configDict)) { coloured = string.Empty; } string quantisation = " -q 64 "; // default if (configDict.ContainsKey(AnalysisKeys.SonogramQuantisation)) { quantisation = " -q " + ConfigDictionary.GetInt(AnalysisKeys.SonogramQuantisation, configDict); } // Path\sox.exe -V "sourcefile.wav" -n rate 22050 spectrogram -m -r -l -a -q 249 -w hann -y 257 -X 43.06640625 -z 100 -o "imagefile.png" //string soxCommandLineArguments = " -V \"{0}\" -n rate 22050 spectrogram -m -r -l -a -q 249 -w hann -y 257 -X 43.06640625 -z 100 -o \"{1}\""; //greyscale only //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -m -l -o \"{1}\""; //greyscale with time, freq and intensity scales //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -m -o \"{1}\""; //reverse image greyscale with time, freq and intensity scales //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -l -o \"{1}\""; //colour with time, freq and intensity scales //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -m -q 64 -r -l -o \"{6}\""; //64 grey scale, with time, freq and intensity scales const string SoxCommandLineArguments = " -V \"{0}\" -n spectrogram -m {1} -q 64 -l -o \"{6}\""; //64 grey scale, with time, freq and intensity scales //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -l {1} {2} {3} {4} {5} -o \"{6}\""; //64 grey scale, with time, freq and intensity scales // FOR COMMAND LINE OPTIONS SEE: http://sox.sourceforge.net/sox.html // −a Suppress display of axis lines. This is sometimes useful in helping to discern artefacts at the spectrogram edges. // -l Print firendly monochrome spectrogram. // −m Creates a monochrome spectrogram (the default is colour). // -q Number of intensity quanitisation levels/colors - try -q 64 // −r Raw spectrogram: suppress the display of axes and legends. // −t text Set the image title - text to display above the spectrogram. // −c text Set (or clear) the image comment - text to display below and to the left of the spectrogram. // trim 20 30 displays spectrogram of 30 seconds duratoin starting at 20 seconds. var args = string.Format(SoxCommandLineArguments, fiAudio.FullName, title, comment, axes, coloured, quantisation, output.FullName); using (var process = new ProcessRunner(soxCmd)) { process.Run(args, output.DirectoryName); } }
/// <summary> /// /// </summary> /// <param name="fiAudio"></param> /// <param name="fiConfig"></param> /// <param name="fiImage"></param> /// <returns></returns> public static Image GetImageFromAudioSegment(FileInfo fiAudio, FileInfo fiConfig, FileInfo fiImage, IAnalyser2 analyser) { var config = new ConfigDictionary(fiConfig.FullName); //read in config file bool doAnnotate = config.GetBoolean(AnalysisKeys.AnnotateSonogram); //bool doNoiseReduction = config.GetBoolean(Keys.NOISE_DO_REDUCTION); //double bgNoiseThreshold = config.GetDouble(Keys.NOISE_BG_REDUCTION); var diOutputDir = new DirectoryInfo(Path.GetDirectoryName(fiImage.FullName)); //Image image = null; if (doAnnotate) { if (analyser == null) { string analyisName = config.GetString(AnalysisKeys.AnalysisName); LoggedConsole.WriteLine("\nWARNING: Could not construct annotated image because analysis name not recognized:"); LoggedConsole.WriteLine("\t " + analyisName); return(null); } throw new NotSupportedException("Code intentionally broken because it is out of date and not used"); /* * Image image = null; * var settings = new AnalysisSettings * { * ConfigDict = config.GetDictionary(), * SegmentAudioFile = fiAudio, * ConfigFile = fiConfig, * SegmentImageFile = fiImage, * SegmentOutputDirectory = diOutputDir * }; * * // want to pass SampleRate of the original file. * settings.SampleRateOfOriginalAudioFile = int.Parse(settings.ConfigDict[AnalysisKeys.ResampleRate]); * * analyser.BeforeAnalyze(settings); * * var results = analyser.Analyze(settings, new SegmentSettings<FileInfo>(se)); * * image = results.ImageFile == null ? null : Image.FromFile(results.ImageFile.FullName); * * analyser = null; * return image;*/ } else { analyser = null; var configDict = config.GetDictionary(); BaseSonogram sonogram = Audio2DecibelSonogram(fiAudio, configDict); var mti = Sonogram2MultiTrackImage(sonogram, configDict); var image = mti.GetImage(); if (image != null) { if (fiImage.Exists) { fiImage.Delete(); } image.Save(fiImage.FullName, ImageFormat.Png); } return(image); } }
/// <summary> /// /// </summary> /// <returns></returns> public static Image_MultiTrack Sonogram2MultiTrackImage(BaseSonogram sonogram, Dictionary <string, string> configDict) { bool doHighlightSubband = false; //check if doing a reduced sonogram //int timeReductionFactor = 1; //if (configDict.ContainsKey(Keys.TIME_REDUCTION_FACTOR)) // timeReductionFactor = ConfigDictionary.GetInt(Keys.TIME_REDUCTION_FACTOR, configDict); //int freqReductionFactor = 1; //if (configDict.ContainsKey(Keys.FREQ_REDUCTION_FACTOR)) // freqReductionFactor = ConfigDictionary.GetInt(Keys.FREQ_REDUCTION_FACTOR, configDict); //if (!((timeReductionFactor == 1) && (freqReductionFactor == 1))) //{ // sonogram.Data = ReduceDimensionalityOfSpectrogram(sonogram.Data, timeReductionFactor, freqReductionFactor); // return sonogram.GetImage(doHighlightSubband, add1kHzLines); //} // (iii) NOISE REDUCTION //bool doNoiseReduction = false; //if (configDict.ContainsKey(AnalysisKeys.NoiseDoReduction)) // doNoiseReduction = ConfigDictionary.GetBoolean(AnalysisKeys.NoiseDoReduction, configDict); //if (doNoiseReduction) //{ // //LoggedConsole.WriteLine("PERFORMING NOISE REDUCTION"); // double bgThreshold = 3.0; // if (configDict.ContainsKey(AnalysisKeys.NoiseBgThreshold)) // bgThreshold = ConfigDictionary.GetDouble(AnalysisKeys.NoiseBgThreshold, configDict); // var tuple = SNR.NoiseReduce(sonogram.Data, NoiseReductionType.STANDARD, bgThreshold); // sonogram.Data = tuple.Item1; // store data matrix //} //ADD time and frequency scales bool addScale = false; if (configDict.ContainsKey(AnalysisKeys.AddTimeScale)) { addScale = ConfigDictionary.GetBoolean(AnalysisKeys.AddTimeScale, configDict); } else if (configDict.ContainsKey(AnalysisKeys.AddAxes)) { addScale = ConfigDictionary.GetBoolean(AnalysisKeys.AddAxes, configDict); } Image img = sonogram.GetImage(doHighlightSubband, add1KHzLines: addScale, doMelScale: false); Image_MultiTrack mti = new Image_MultiTrack(img); if (addScale) { mti.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond)); //add time scale } bool addSegmentationTrack = false; //add segmentation track if (configDict.ContainsKey(AnalysisKeys.AddSegmentationTrack)) { addSegmentationTrack = ConfigDictionary.GetBoolean(AnalysisKeys.AddSegmentationTrack, configDict); } if (addSegmentationTrack) { mti.AddTrack(ImageTrack.GetSegmentationTrack(sonogram)); //add segmentation track } return(mti); //mti.AddTrack(ImageTrack.GetWavEnvelopeTrack(sonogram)); //add segmentation track }//Sonogram2MultiTrackImage()