Exemple #1
0
 public MfccConfiguration(ConfigDictionary config)
 {
     this.FilterbankCount    = config.GetInt(ConfigKeys.Mfcc.Key_FilterbankCount);
     this.DoMelScale         = config.GetBoolean(ConfigKeys.Mfcc.Key_DoMelScale);
     this.CcCount            = config.GetInt(ConfigKeys.Mfcc.Key_CcCount); //number of cepstral coefficients
     this.IncludeDelta       = config.GetBoolean(ConfigKeys.Mfcc.Key_IncludeDelta);
     this.IncludeDoubleDelta = config.GetBoolean(ConfigKeys.Mfcc.Key_IncludeDoubleDelta);
 }
Exemple #2
0
        /// <summary>
        /// A WRAPPER AROUND THE analyser.Analyze(analysisSettings) METHOD
        /// To be called as an executable with command line arguments.
        /// </summary>
        public static void Execute(Arguments arguments)
        {
            Contract.Requires(arguments != null);

            var(analysisSettings, segmentSettings) = arguments.ToAnalysisSettings();
            TimeSpan offsetStart  = TimeSpan.FromSeconds(arguments.Start ?? 0);
            TimeSpan duration     = TimeSpan.FromSeconds(arguments.Duration ?? 0);
            int      resampleRate = ConfigDictionary.GetInt(AnalysisKeys.ResampleRate, analysisSettings.ConfigDict);

            // EXTRACT THE REQUIRED RECORDING SEGMENT
            FileInfo tempF = segmentSettings.SegmentAudioFile;

            if (tempF.Exists)
            {
                tempF.Delete();
            }

            if (duration == TimeSpan.Zero)
            {
                // Process entire file
                AudioFilePreparer.PrepareFile(arguments.Source, tempF, new AudioUtilityRequest {
                    TargetSampleRate = resampleRate
                }, analysisSettings.AnalysisTempDirectoryFallback);
                ////var fiSegment = AudioFilePreparer.PrepareFile(diOutputDir, fiSourceFile, , Human2.RESAMPLE_RATE);
            }
            else
            {
                AudioFilePreparer.PrepareFile(arguments.Source, tempF, new AudioUtilityRequest {
                    TargetSampleRate = resampleRate, OffsetStart = offsetStart, OffsetEnd = offsetStart.Add(duration)
                }, analysisSettings.AnalysisTempDirectoryFallback);
                ////var fiSegmentOfSourceFile = AudioFilePreparer.PrepareFile(diOutputDir, new FileInfo(recordingPath), MediaTypes.MediaTypeWav, TimeSpan.FromMinutes(2), TimeSpan.FromMinutes(3), RESAMPLE_RATE);
            }

            //DO THE ANALYSIS
            // #############################################################################################################################################
            // BROKEN!
            throw new NotImplementedException("Broken in code updates");
            IAnalyser2      analyser = null; //new Rain_OBSOLETE();
            AnalysisResult2 result   = analyser.Analyze <FileInfo>(analysisSettings, null /*broken */);

            /*DataTable dt = result.Data;
             * //#############################################################################################################################################
             *
             * // ADD IN ADDITIONAL INFO TO RESULTS TABLE
             * if (dt != null)
             * {
             *  int iter = 0; // dummy - iteration number would ordinarily be available at this point.
             *  int startMinute = (int)offsetStart.TotalMinutes;
             *  foreach (DataRow row in dt.Rows)
             *  {
             *      row[InitialiseIndexProperties.KEYRankOrder] = iter;
             *      row[InitialiseIndexProperties.KEYStartMinute] = startMinute;
             *      row[InitialiseIndexProperties.KEYSegmentDuration] = result.AudioDuration.TotalSeconds;
             *  }
             *
             *  CsvTools.DataTable2CSV(dt, segmentSettings.SegmentSummaryIndicesFile.FullName);
             *  //DataTableTools.WriteTable2Console(dt);
             * }*/
        }
        /// <summary>
        /// DoSnr = true;
        /// DoFullBandwidth = false;
        /// </summary>
        /// <param name="config">read from file</param>
        private void Initialize(ConfigDictionary config)
        {
            if (config == null)
            {
                throw new ArgumentNullException(nameof(config));
            }

            this.CallName    = config.GetString(ConfigKeys.Recording.Key_RecordingCallName);
            this.SourceFName = config.GetString(ConfigKeys.Recording.Key_RecordingFileName);
            var duration = config.GetDoubleNullable("WAV_DURATION");

            if (duration != null)
            {
                this.Duration = TimeSpan.FromSeconds(duration.Value);
            }

            //FRAMING PARAMETERS
            this.WindowSize    = config.GetInt(ConfigKeys.Windowing.Key_WindowSize);
            this.WindowOverlap = config.GetDouble(ConfigKeys.Windowing.Key_WindowOverlap);

            //NOISE REDUCTION PARAMETERS
            this.DoSnr = true; // set false if only want to
            string noisereduce = config.GetString(AnalysisKeys.NoiseReductionType);

            //this.NoiseReductionType = (NoiseReductionType)Enum.Parse(typeof(NoiseReductionType), noisereduce.ToUpperInvariant());
            this.NoiseReductionType = (NoiseReductionType)Enum.Parse(typeof(NoiseReductionType), noisereduce);

            //FREQ BAND PARAMETERS
            this.DoFullBandwidth = false; // set true if only want to
            this.MinFreqBand     = config.GetIntNullable(ConfigKeys.Mfcc.Key_MinFreq);
            this.MaxFreqBand     = config.GetIntNullable(ConfigKeys.Mfcc.Key_MaxFreq);
            this.MidFreqBand     = this.MinFreqBand + ((this.MaxFreqBand - this.MinFreqBand) / 2);

            //SEGMENTATION PARAMETERS
            EndpointDetectionConfiguration.SetConfig(config);

            //MFCC PARAMETERS
            this.DoMelScale = config.GetBoolean(ConfigKeys.Mfcc.Key_DoMelScale);
            this.mfccConfig = new MfccConfiguration(config);
            this.DeltaT     = config.GetInt(ConfigKeys.Mfcc.Key_DeltaT); // Frames between acoustic vectors

            // for generating only spectrogram.
        }
        /// <summary>
        /// DoSnr = true;
        /// DoFullBandwidth = false;
        /// </summary>
        /// <param name="configDict">Dictionary of config values</param>
        private void Initialize(Dictionary <string, string> configDict)
        {
            this.CallName    = configDict[ConfigKeys.Recording.Key_RecordingCallName];
            this.SourceFName = configDict[ConfigKeys.Recording.Key_RecordingFileName];

            // var duration = config.GetDoubleNullable("WAV_DURATION");
            // if (duration != null) Duration = TimeSpan.FromSeconds(duration.Value);

            //FRAMING PARAMETERS
            this.WindowSize = 512; // default value
            if (configDict.ContainsKey(AnalysisKeys.FrameLength))
            {
                this.WindowSize = ConfigDictionary.GetInt(AnalysisKeys.FrameLength, configDict);
            }

            this.WindowOverlap = 0.0; // default value
            if (configDict.ContainsKey(AnalysisKeys.FrameOverlap))
            {
                this.WindowOverlap = ConfigDictionary.GetDouble(AnalysisKeys.FrameOverlap, configDict);
            }

            this.sampleRate = 0;
            if (configDict.ContainsKey(AnalysisKeys.ResampleRate))
            {
                this.sampleRate = ConfigDictionary.GetInt("ResampleRate", configDict);
            }

            //NOISE REDUCTION PARAMETERS
            // NoiseReductionParameter = config.GetDouble(SNR.key_Snr.key_);
            this.DoSnr = true; // set false if only want to
            this.NoiseReductionType = NoiseReductionType.None;
            if (configDict.ContainsKey(AnalysisKeys.NoiseReductionType))
            {
                string noiseReductionType = configDict[AnalysisKeys.NoiseReductionType];

                // this.NoiseReductionType = (NoiseReductionType)Enum.Parse(typeof(NoiseReductionType), noiseReductionType.ToUpperInvariant());
                this.NoiseReductionType = (NoiseReductionType)Enum.Parse(typeof(NoiseReductionType), noiseReductionType);
            }

            // FREQ BAND PARAMETERS
            this.DoFullBandwidth = true; // set true if only want to

            // MinFreqBand = config.GetIntNullable(ConfigKeys.Mfcc.Key_MinFreq);
            // MaxFreqBand = config.GetIntNullable(ConfigKeys.Mfcc.Key_MaxFreq);
            // MidFreqBand = MinFreqBand + ((MaxFreqBand - MinFreqBand) / 2);

            // SEGMENTATION PARAMETERS
            // EndpointDetectionConfiguration.SetConfig(config);

            // MFCC PARAMETERS
            // DoMelScale = config.GetBoolean(ConfigKeys.Mfcc.Key_DoMelScale);
            // mfccConfig = new MfccConfiguration(config);
            // DeltaT = config.GetInt(ConfigKeys.Mfcc.Key_DeltaT); // Frames between acoustic vectors
        }
        public static SonogramConfig Load(string configFile)
        {
            Log.WriteLine("config file =" + configFile);
            if (!File.Exists(configFile))
            {
                Log.WriteLine("The configuration file <" + configFile + "> does not exist!");
                Log.WriteLine("Initialising application with default parameter values.");
                return(new SonogramConfig());
            }
            else
            {
                var config = new ConfigDictionary(configFile);
                if (config.GetInt("VERBOSITY") > 0)
                {
                    Log.Verbosity = 1;
                    Log.WriteIfVerbose("Verbosity set true in Application Config file.");
                }

                return(new SonogramConfig(config));
            }
        }
        Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> config)
        {
            int    minHzMale    = ConfigDictionary.GetInt(LSKiwi1.key_MIN_HZ_MALE, config);
            int    maxHzMale    = ConfigDictionary.GetInt(LSKiwi1.key_MAX_HZ_MALE, config);
            int    minHzFemale  = ConfigDictionary.GetInt(LSKiwi1.key_MIN_HZ_FEMALE, config);
            int    maxHzFemale  = ConfigDictionary.GetInt(LSKiwi1.key_MAX_HZ_FEMALE, config);
            int    frameLength  = ConfigDictionary.GetInt(LSKiwi1.key_FRAME_LENGTH, config);
            double frameOverlap = ConfigDictionary.GetDouble(LSKiwi1.key_FRAME_OVERLAP, config);
            //double dctDuration = ConfigDictionary.GetDouble(LSKiwi1.key_DCT_DURATION, config);
            //double dctThreshold = ConfigDictionary.GetDouble(LSKiwi1.key_DCT_THRESHOLD, config);
            double minPeriod      = ConfigDictionary.GetDouble(LSKiwi1.key_MIN_PERIODICITY, config);
            double maxPeriod      = ConfigDictionary.GetDouble(LSKiwi1.key_MAX_PERIODICITY, config);
            double eventThreshold = ConfigDictionary.GetDouble(Keys.EVENT_THRESHOLD, config);
            double minDuration    = ConfigDictionary.GetDouble(LSKiwi1.key_MIN_DURATION, config); //minimum event duration to qualify as species call
            double maxDuration    = ConfigDictionary.GetDouble(LSKiwi1.key_MAX_DURATION, config); //maximum event duration to qualify as species call

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            if (recording == null)
            {
                Console.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }
            TimeSpan tsRecordingtDuration = recording.Duration();

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName        = recording.FileName;
            sonoConfig.WindowSize         = frameLength;
            sonoConfig.WindowOverlap      = frameOverlap;
            sonoConfig.NoiseReductionType = NoiseReductionType.STANDARD; //MUST DO NOISE REMOVAL
            BaseSonogram sonogram = new SpectralSonogram(sonoConfig, recording.GetWavReader());

            //DETECT MALE KIWI
            var resultsMale      = DetectKiwi(sonogram, minHzMale, maxHzMale, /*dctDuration, dctThreshold,*/ minPeriod, maxPeriod, eventThreshold, minDuration, maxDuration);
            var scoresM          = resultsMale.Item1;
            var hitsM            = resultsMale.Item2;
            var predictedEventsM = resultsMale.Item3;

            foreach (AcousticEvent ev in predictedEventsM)
            {
                ev.Name = "LSK(m)";
            }
            //DETECT FEMALE KIWI
            var resultsFemale    = DetectKiwi(sonogram, minHzFemale, maxHzFemale, /* dctDuration, dctThreshold,*/ minPeriod, maxPeriod, eventThreshold, minDuration, maxDuration);
            var scoresF          = resultsFemale.Item1;
            var hitsF            = resultsFemale.Item2;
            var predictedEventsF = resultsFemale.Item3;

            foreach (AcousticEvent ev in predictedEventsF)
            {
                ev.Name = "LSK(f)";
            }

            //combine the male and female results
            hitsM = MatrixTools.AddMatrices(hitsM, hitsF);
            foreach (AcousticEvent ev in predictedEventsF)
            {
                predictedEventsM.Add(ev);
            }
            foreach (double[] array in scoresF)
            {
                scoresM.Add(array);
            }

            return(System.Tuple.Create(sonogram, hitsM, scoresM, predictedEventsM, tsRecordingtDuration));
        } //Analysis()
Exemple #7
0
        public static Tuple <Dictionary <string, double>, TimeSpan> RainAnalyser(FileInfo fiAudioFile, AnalysisSettings analysisSettings, SourceMetadata originalFile)
        {
            Dictionary <string, string> config = analysisSettings.ConfigDict;

            // get parameters for the analysis
            int    frameSize     = IndexCalculateConfig.DefaultWindowSize;
            double windowOverlap = 0.0;
            int    lowFreqBound  = 1000;
            int    midFreqBound  = 8000;

            if (config.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameSize = ConfigDictionary.GetInt(AnalysisKeys.FrameLength, config);
            }
            if (config.ContainsKey(key_LOW_FREQ_BOUND))
            {
                lowFreqBound = ConfigDictionary.GetInt(key_LOW_FREQ_BOUND, config);
            }
            if (config.ContainsKey(key_MID_FREQ_BOUND))
            {
                midFreqBound = ConfigDictionary.GetInt(key_MID_FREQ_BOUND, config);
            }
            if (config.ContainsKey(AnalysisKeys.FrameOverlap))
            {
                windowOverlap = ConfigDictionary.GetDouble(AnalysisKeys.FrameOverlap, config);
            }

            // get recording segment
            AudioRecording recording = new AudioRecording(fiAudioFile.FullName);

            // calculate duration/size of various quantities.
            int      signalLength  = recording.WavReader.Samples.Length;
            TimeSpan audioDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            double   duration      = frameSize * (1 - windowOverlap) / (double)recording.SampleRate;
            TimeSpan frameDuration = TimeSpan.FromTicks((long)(duration * TimeSpan.TicksPerSecond));

            int    chunkDuration   = 10; //seconds
            double framesPerSecond = 1 / frameDuration.TotalSeconds;
            int    chunkCount      = (int)Math.Round(audioDuration.TotalSeconds / (double)chunkDuration);
            int    framesPerChunk  = (int)(chunkDuration * framesPerSecond);

            string[] classifications = new string[chunkCount];

            //i: EXTRACT ENVELOPE and FFTs
            double epsilon       = Math.Pow(0.5, recording.BitsPerSample - 1);
            var    signalextract = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, epsilon, frameSize, windowOverlap);

            double[] envelope = signalextract.Envelope;
            double[,] spectrogram = signalextract.AmplitudeSpectrogram;  //amplitude spectrogram
            int colCount = spectrogram.GetLength(1);

            int    nyquistFreq = recording.Nyquist;
            int    nyquistBin  = spectrogram.GetLength(1) - 1;
            double binWidth    = nyquistFreq / (double)spectrogram.GetLength(1);

            // calculate the bin id of boundary between mid and low frequency spectrum
            int lowBinBound = (int)Math.Ceiling(lowFreqBound / binWidth);

            // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this iwll be less than 17640/2.
            int originalAudioNyquist = originalFile.SampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz.

            if (recording.Nyquist > originalAudioNyquist)
            {
                nyquistFreq = originalAudioNyquist;
                nyquistBin  = (int)Math.Floor(originalAudioNyquist / binWidth);
            }

            // vi: CALCULATE THE ACOUSTIC COMPLEXITY INDEX
            var subBandSpectrogram = MatrixTools.Submatrix(spectrogram, 0, lowBinBound, spectrogram.GetLength(0) - 1, nyquistBin);

            double[] aciArray = AcousticComplexityIndex.CalculateACI(subBandSpectrogram);
            double   aci1     = aciArray.Average();

            // ii: FRAME ENERGIES -
            // convert signal to decibels and subtract background noise.
            double StandardDeviationCount = 0.1; // number of noise SDs to calculate noise threshold - determines severity of noise reduction
            var    results3 = SNR.SubtractBackgroundNoiseFromWaveform_dB(SNR.Signal2Decibels(signalextract.Envelope), StandardDeviationCount);
            var    dBarray  = SNR.TruncateNegativeValues2Zero(results3.NoiseReducedSignal);

            //// vii: remove background noise from the full spectrogram i.e. BIN 1 to Nyquist
            //spectrogramData = MatrixTools.Submatrix(spectrogramData, 0, 1, spectrogramData.GetLength(0) - 1, nyquistBin);
            //const double SpectralBgThreshold = 0.015; // SPECTRAL AMPLITUDE THRESHOLD for smoothing background
            //double[] modalValues = SNR.CalculateModalValues(spectrogramData); // calculate modal value for each freq bin.
            //modalValues = DataTools.filterMovingAverage(modalValues, 7);      // smooth the modal profile
            //spectrogramData = SNR.SubtractBgNoiseFromSpectrogramAndTruncate(spectrogramData, modalValues);
            //spectrogramData = SNR.RemoveNeighbourhoodBackgroundNoise(spectrogramData, SpectralBgThreshold);

            //set up the output
            if (Verbose)
            {
                LoggedConsole.WriteLine("{0:d2}, {1},  {2},    {3},    {4},    {5},   {6},     {7},     {8},    {9},   {10},   {11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2");
            }
            StringBuilder sb = null;

            if (WriteOutputFile)
            {
                string header = string.Format("{0:d2},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11}", "start", "end", "avDB", "BG", "SNR", "act", "spik", "lf", "mf", "hf", "H[t]", "H[s]", "index1", "index2");
                sb = new StringBuilder(header + "\n");
            }

            Dictionary <string, double> dict = RainIndices.GetIndices(envelope, audioDuration, frameDuration, spectrogram, lowFreqBound, midFreqBound, binWidth);

            return(Tuple.Create(dict, audioDuration));
        } //Analysis()
Exemple #8
0
        public static void MakeSonogramWithSox(FileInfo fiAudio, Dictionary <string, string> configDict, FileInfo output)
        {
            var soxPath = new FileInfo(AppConfigHelper.SoxExe);

            if (!soxPath.Exists)
            {
                LoggedConsole.WriteLine("SOX ERROR: Path does not exist: <{0}>", soxPath.FullName);
                throw new FileNotFoundException("SOX ERROR: Path for executable does not exist.", soxPath.FullName);
            }

            // must quote the path because has a space in it.
            string soxCmd = "\"" + AppConfigHelper.SoxExe + "\"";

            string title = string.Empty;

            if (configDict.ContainsKey(AnalysisKeys.SonogramTitle))
            {
                title = " -t " + configDict[AnalysisKeys.SonogramTitle];
            }

            string comment = string.Empty;

            if (configDict.ContainsKey(AnalysisKeys.SonogramComment))
            {
                comment = " -c " + configDict[AnalysisKeys.SonogramComment];
            }

            string axes = "-r";

            if (configDict.ContainsKey(AnalysisKeys.AddAxes) && !ConfigDictionary.GetBoolean(AnalysisKeys.AddAxes, configDict))
            {
                axes = string.Empty;
            }

            string coloured = " -m "; // default

            if (configDict.ContainsKey(AnalysisKeys.SonogramColored) && ConfigDictionary.GetBoolean(AnalysisKeys.SonogramColored, configDict))
            {
                coloured = string.Empty;
            }

            string quantisation = " -q 64 "; // default

            if (configDict.ContainsKey(AnalysisKeys.SonogramQuantisation))
            {
                quantisation = " -q " + ConfigDictionary.GetInt(AnalysisKeys.SonogramQuantisation, configDict);
            }

            //          Path\sox.exe  -V "sourcefile.wav" -n rate 22050 spectrogram -m -r -l -a -q 249 -w hann -y 257 -X 43.06640625 -z 100 -o "imagefile.png"
            //string soxCommandLineArguments = " -V \"{0}\" -n rate 22050 spectrogram -m -r -l -a -q 249 -w hann -y 257 -X 43.06640625 -z 100 -o \"{1}\"";  //greyscale only
            //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -m -l -o \"{1}\"";  //greyscale with time, freq and intensity scales
            //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -m -o \"{1}\"";     //reverse image greyscale with time, freq and intensity scales
            //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -l -o \"{1}\"";     //colour with time, freq and intensity scales
            //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -m -q 64 -r -l -o \"{6}\"";    //64 grey scale, with time, freq and intensity scales
            const string SoxCommandLineArguments = " -V \"{0}\" -n spectrogram -m {1} -q 64 -l -o \"{6}\""; //64 grey scale, with time, freq and intensity scales

            //string soxCommandLineArguments = " -V \"{0}\" -n spectrogram -l {1} {2} {3} {4} {5} -o \"{6}\"";    //64 grey scale, with time, freq and intensity scales

            // FOR COMMAND LINE OPTIONS SEE:  http://sox.sourceforge.net/sox.html
            // −a     Suppress display of axis lines. This is sometimes useful in helping to discern artefacts at the spectrogram edges.
            // -l     Print firendly monochrome spectrogram.
            // −m     Creates a monochrome spectrogram (the default is colour).
            // -q     Number of intensity quanitisation levels/colors - try -q 64
            // −r     Raw spectrogram: suppress the display of axes and legends.
            // −t text  Set the image title - text to display above the spectrogram.
            // −c text  Set (or clear) the image comment - text to display below and to the left of the spectrogram.
            // trim 20 30  displays spectrogram of 30 seconds duratoin starting at 20 seconds.
            var args = string.Format(SoxCommandLineArguments, fiAudio.FullName, title, comment, axes, coloured, quantisation, output.FullName);

            using (var process = new ProcessRunner(soxCmd))
            {
                process.Run(args, output.DirectoryName);
            }
        }