// OTHER CONSTANTS
        //private const string ImageViewer = @"C:\Windows\system32\mspaint.exe";

        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            var recognizerConfig = new LitoriaWatjulumConfig();

            recognizerConfig.ReadConfigFile(configuration);

            //int maxOscilRate = (int)Math.Ceiling(1 / lwConfig.MinPeriod);

            if (recording.WavReader.SampleRate != 22050)
            {
                throw new InvalidOperationException("Requires a 22050Hz file");
            }

            TimeSpan recordingDuration = recording.WavReader.Time;

            // this default framesize seems to work
            const int frameSize     = 128;
            double    windowOverlap = 0.0;

            // calculate the overlap instead
            //double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap(
            //    recording.SampleRate,
            //    frameSize,
            //    maxOscilRate);

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName = recording.BaseName,

                //set default values - ignore those set by user
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };

            //#############################################################################################################################################
            //DO THE ANALYSIS
            var results = Analysis(recording, sonoConfig, recognizerConfig, MainEntry.InDEBUG, segmentStartOffset);

            //######################################################################

            if (results == null)
            {
                return(null); //nothing to process
            }

            var sonogram        = results.Item1;
            var hits            = results.Item2;
            var scoreArray      = results.Item3;
            var predictedEvents = results.Item4;
            var debugImage      = results.Item5;

            // old way of creating a path:
            //var debugPath = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.FileName), SpeciesName, "png", "DebugSpectrogram"));
            var debugPath = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");

            debugImage.Save(debugPath);

            //#############################################################################################################################################

            // Prune events here if required i.e. remove those below threshold score if this not already done. See other recognizers.
            foreach (var ae in predictedEvents)
            {
                // add additional info
                ae.Name                   = recognizerConfig.AbbreviatedSpeciesName;
                ae.SpeciesName            = recognizerConfig.SpeciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
            }

            // do a recognizer TEST.
            if (false)
            {
                var testDir = new DirectoryInfo(outputDirectory.Parent.Parent.FullName);
                TestTools.RecognizerScoresTest(recording.BaseName, testDir, recognizerConfig.AnalysisName, scoreArray);
                AcousticEvent.TestToCompareEvents(recording.BaseName, testDir, recognizerConfig.AnalysisName, predictedEvents);
            }

            var plot = new Plot(this.DisplayName, scoreArray, recognizerConfig.EventThreshold);

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = predictedEvents,
            });
        }
示例#2
0
        /// <summary>
        /// The CORE ANALYSIS METHOD.
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values -
            int frameLength = 1024;

            if (configDict.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]);
            }

            double windowOverlap              = 0.0;
            int    minHz                      = int.Parse(configDict["MIN_HZ"]);
            int    minFormantgap              = int.Parse(configDict["MIN_FORMANT_GAP"]);
            int    maxFormantgap              = int.Parse(configDict["MAX_FORMANT_GAP"]);
            double decibelThreshold           = double.Parse(configDict["DECIBEL_THRESHOLD"]);   //dB
            double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double callDuration               = double.Parse(configDict["CALL_DURATION"]);       // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameLength,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            }; //default values config

            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = freqBinWidth;

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int numberOfBins = 64;
            int minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int maxbin       = minBin + numberOfBins - 1;
            int maxHz        = (int)Math.Round(minHz + (numberOfBins * freqBinWidth));

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin);

            int callSpan = (int)Math.Round(callDuration * framesPerSecond);

            //#############################################################################################################################################
            //ii: DETECT HARMONICS
            var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan);

            double[] dBArray     = results.Item1;
            double[] intensity   = results.Item2;   //an array of periodicity scores
            double[] periodicity = results.Item3;

            //intensity = DataTools.filterMovingAverage(intensity, 3);
            int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise

            double[] scoreArray = new double[intensity.Length];
            for (int r = 0; r < rowCount; r++)
            {
                if (intensity[r] < harmonicIntensityThreshold)
                {
                    continue;
                }

                //ignore locations with incorrect formant gap
                double herzPeriod = periodicity[r] * freqBinWidth;
                if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap)
                {
                    continue;
                }

                //find freq having max power and use info to adjust score.
                //expect humans to have max < 1000 Hz
                double[] spectrum = MatrixTools.GetRow(sonogram.Data, r);
                for (int j = 0; j < noiseBound; j++)
                {
                    spectrum[j] = 0.0;
                }

                int    maxIndex         = DataTools.GetMaxIndex(spectrum);
                int    freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth);
                double discount         = 1.0;
                if (freqWithMaxPower < 1200)
                {
                    discount = 0.0;
                }

                if (intensity[r] > harmonicIntensityThreshold)
                {
                    scoreArray[r] = intensity[r] * discount;
                }
            }

            //transfer info to a hits matrix.
            var    hits      = new double[rowCount, colCount];
            double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits

            for (int r = 0; r < rowCount; r++)
            {
                if (scoreArray[r] < threshold)
                {
                    continue;
                }

                double herzPeriod = periodicity[r] * freqBinWidth;
                for (int c = minBin; c < maxbin; c++)
                {
                    //hits[r, c] = herzPeriod / (double)380;  //divide by 380 to get a relativePeriod;
                    hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap;  //to get a relativePeriod;
                }
            }

            //iii: CONVERT TO ACOUSTIC EVENTS
            double maxPossibleScore = 0.5;
            int    halfCallSpan     = callSpan / 2;
            var    predictedEvents  = new List <AcousticEvent>();

            for (int i = 0; i < rowCount; i++)
            {
                //assume one score position per crow call
                if (scoreArray[i] < 0.001)
                {
                    continue;
                }

                double        startTime = (i - halfCallSpan) / framesPerSecond;
                AcousticEvent ev        = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz);
                ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth);
                ev.Score           = scoreArray[i];
                ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold

                //ev.Score_MaxPossible = maxPossibleScore;
                predictedEvents.Add(ev);
            } //for loop

            Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold);

            return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration));
        } //Analysis()
示例#3
0
        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD.
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values - ignore those set by user
            int    frameSize     = 128;
            double windowOverlap = 0.5;

            double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double minDuration        = double.Parse(configDict["MIN_DURATION"]);        // seconds
            double maxDuration        = double.Parse(configDict["MAX_DURATION"]);        // seconds
            double minPeriod          = double.Parse(configDict["MIN_PERIOD"]);          // seconds
            double maxPeriod          = double.Parse(configDict["MAX_PERIOD"]);          // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameSize,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            }; //default values config

            //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE");
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   frameOffset     = sonoConfig.GetFrameOffset(sr);
            double   framesPerSecond = 1 / frameOffset;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 256      17640       14.5ms          68.9        68.9    ms          hz          hz
            // 512      17640       29.0ms          34.4        34.4    ms          hz          hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //The Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            // Assuming sr=17640 and window=256, then binWidth = 68.9Hz and 1500Hz = bin 21.7..
            // Therefore do a Xcorrelation between bins 21 and 22.
            // Number of frames to span must power of 2. Try 16 frames which covers 232ms - almost 1/4 second.

            int midHz    = 1500;
            int lowerBin = (int)(midHz / freqBinWidth) + 1;  //because bin[0] = DC
            int upperBin = lowerBin + 4;
            int lowerHz  = (int)Math.Floor((lowerBin - 1) * freqBinWidth);
            int upperHz  = (int)Math.Ceiling((upperBin - 1) * freqBinWidth);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetColumn(sonogram.Data, lowerBin);
            double[] upperArray = MatrixTools.GetColumn(sonogram.Data, upperBin);
            lowerArray = DataTools.NormaliseInZeroOne(lowerArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB #######################################################################
            upperArray = DataTools.NormaliseInZeroOne(upperArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB #######################################################################

            int step         = (int)(framesPerSecond / 40);               //take one/tenth second steps
            int stepCount    = rowCount / step;
            int sampleLength = 32;                                        //16 frames = 232ms - almost 1/4 second.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES

            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if (lowerSubarray == null || upperSubarray == null)
                {
                    break;
                }

                if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength)
                {
                    break;
                }

                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 2;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if (period < minPeriod || period > maxPeriod)
                {
                    continue;
                }

                // lay down score for sample length
                for (int j = 0; j < sampleLength; j++)
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }

                    periodicity[start + j] = period;
                }
            }

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 3);
            intensity = DataTools.NormaliseInZeroOne(intensity, 0, 0.5); //## ABSOLUTE NORMALISATION 0-0.5 #######################################################################

            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerHz,
                upperHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray, segmentStartOffset);
            var hits = new double[rowCount, colCount];

            var plots = new List <Plot>();

            //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0));
            //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0));
            //plots.Add(new Plot("lowerArray", DataTools.NormaliseMatrixValues(lowerArray), 0.25));
            //plots.Add(new Plot("upperArray", DataTools.NormaliseMatrixValues(upperArray), 0.25));
            //plots.Add(new Plot("intensity",  DataTools.NormaliseMatrixValues(intensity), intensityThreshold));
            plots.Add(new Plot("intensity", intensity, intensityThreshold));

            return(Tuple.Create(sonogram, hits, plots, predictedEvents, tsRecordingtDuration));
        } //Analysis()
        ///  <summary>
        ///  ################ THE KEY ANALYSIS METHOD for TRILLS
        ///
        ///  See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles.
        ///  </summary>
        /// <param name="recording"></param>
        /// <param name="sonoConfig"></param>
        /// <param name="lwConfig"></param>
        /// <param name="returnDebugImage"></param>
        /// <param name="segmentStartOffset"></param>
        /// <returns></returns>
        private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LitoriaWatjulumConfig lwConfig,
            bool returnDebugImage,
            TimeSpan segmentStartOffset)
        {
            double intensityThreshold = lwConfig.IntensityThreshold;
            double minDuration        = lwConfig.MinDurationOfTrill; // seconds
            double maxDuration        = lwConfig.MaxDurationOfTrill; // seconds
            double minPeriod          = lwConfig.MinPeriod;          // seconds
            double maxPeriod          = lwConfig.MaxPeriod;          // seconds

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            //TimeSpan tsRecordingtDuration = recording.Duration();
            int    sr              = recording.SampleRate;
            double freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double framesPerSecond = freqBinWidth;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double dctDuration = 4 * maxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);

            //int colCount = sonogram.Data.GetLength(1);

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            //lowerArray = DataTools.filterMovingAverage(lowerArray, 3);
            //upperArray = DataTools.filterMovingAverage(upperArray, 3);

            double[] amplitudeScores  = DataTools.SumMinusDifference(lowerArray, upperArray);
            double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7);

            // Could smooth here rather than above. Above seemed slightly better?
            //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7);
            //differenceScores = DataTools.filterMovingAverage(differenceScores, 7);

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS
            var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lwConfig.LowerBandMinHz,
                lwConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                lwConfig.DecibelThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            for (int i = 0; i < differenceScores.Length; i++)
            {
                if (differenceScores[i] < 1.0)
                {
                    differenceScores[i] = 0.0;
                }
            }

            // LOOK FOR TRILL EVENTS
            // init the score array
            double[] scores = new double[rowCount];

            // var hits = new double[rowCount, colCount];
            double[,] hits = null;

            // init confirmed events
            var confirmedEvents = new List <AcousticEvent>();

            // add names into the returned events
            foreach (var ae in predictedTrillEvents)
            {
                int    eventStart       = ae.Oblong.RowTop;
                int    eventWidth       = ae.Oblong.RowWidth;
                int    step             = 2;
                double maximumIntensity = 0.0;

                // scan the event to get oscillation period and intensity
                for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step)
                {
                    // Look for oscillations in the difference array
                    double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength);
                    double   oscilFreq;
                    double   period;
                    double   intensity;
                    Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity);

                    bool periodWithinBounds = period > minPeriod && period < maxPeriod;

                    //Console.WriteLine($"step={i}    period={period:f4}");

                    if (!periodWithinBounds)
                    {
                        continue;
                    }

                    for (int j = 0; j < dctLength; j++) //lay down score for sample length
                    {
                        if (scores[i + j] < intensity)
                        {
                            scores[i + j] = intensity;
                        }
                    }

                    if (maximumIntensity < intensity)
                    {
                        maximumIntensity = intensity;
                    }
                }

                // add abbreviatedSpeciesName into event
                if (maximumIntensity >= intensityThreshold)
                {
                    ae.Name             = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}";
                    ae.Score_MaxInEvent = maximumIntensity;
                    ae.Profile          = lwConfig.ProfileNames[0];
                    confirmedEvents.Add(ae);
                }
            }

            //######################################################################
            // LOOK FOR TINK EVENTS
            // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            double minDurationOfTink = lwConfig.MinDurationOfTink;  // seconds
            double maxDurationOfTink = lwConfig.MaxDurationOfTink;  // seconds

            // want stronger threshold for tink because brief.
            double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0;
            var    predictedTinkEvents  = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lwConfig.LowerBandMinHz,
                lwConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                tinkDecibelThreshold,
                minDurationOfTink,
                maxDurationOfTink,
                segmentStartOffset);

            foreach (var ae2 in predictedTinkEvents)
            {
                // Prune the list of potential acoustic events, for example using Cosine Similarity.

                //rowtop,  rowWidth
                //int eventStart = ae2.Oblong.RowTop;
                //int eventWidth = ae2.Oblong.RowWidth;
                //int step = 2;
                //double maximumIntensity = 0.0;

                // add abbreviatedSpeciesName into event
                //if (maximumIntensity >= intensityThreshold)
                //{
                ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}";

                //ae2.Score_MaxInEvent = maximumIntensity;
                ae2.Profile = lwConfig.ProfileNames[1];
                confirmedEvents.Add(ae2);

                //}
            }

            //######################################################################

            var   scorePlot  = new Plot(lwConfig.SpeciesName, scores, intensityThreshold);
            Image debugImage = null;

            if (returnDebugImage)
            {
                // display a variety of debug score arrays
                double[] normalisedScores;
                double   normalisedThreshold;
                DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold);
                var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold);
                DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold);
                var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, sumDiffPlot, differencePlot
                };
                debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits);
            }

            // return new sonogram because it makes for more easy interpretation of the image
            var returnSonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = 512,
                WindowOverlap = 0,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader);

            return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage));
        } //Analysis()
示例#5
0
        public static void Main(Arguments arguments)
        {
            //1. set up the necessary files
            //DirectoryInfo diSource = arguments.Source.Directory;
            FileInfo fiSourceRecording = arguments.Source;
            FileInfo fiConfig          = arguments.Config.ToFileInfo();
            FileInfo fiImage           = arguments.Output.ToFileInfo();

            fiImage.CreateParentDirectories();

            string title = "# CREATE FOUR (4) SONOGRAMS FROM AUDIO RECORDING";
            string date  = "# DATE AND TIME: " + DateTime.Now;

            LoggedConsole.WriteLine(title);
            LoggedConsole.WriteLine(date);
            LoggedConsole.WriteLine("# Input  audio file: " + fiSourceRecording.Name);
            LoggedConsole.WriteLine("# Output image file: " + fiImage);

            //2. get the config dictionary
            Config configuration = ConfigFile.Deserialize(fiConfig);

            //below three lines are examples of retrieving info from Config config
            //string analysisIdentifier = configuration[AnalysisKeys.AnalysisName];
            //bool saveIntermediateWavFiles = (bool?)configuration[AnalysisKeys.SaveIntermediateWavFiles] ?? false;
            //scoreThreshold = (double?)configuration[AnalysisKeys.EventThreshold] ?? scoreThreshold;

            //3 transfer conogram parameters to a dictionary to be passed around
            var configDict = new Dictionary <string, string>();

            // #Resample rate must be 2 X the desired Nyquist. Default is that of recording.
            configDict["ResampleRate"] = (configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? 17640).ToString();
            configDict["FrameLength"]  = configuration[AnalysisKeys.FrameLength] ?? "512";
            int frameSize = configuration.GetIntOrNull(AnalysisKeys.FrameLength) ?? 512;

            // #Frame Overlap as fraction: default=0.0
            configDict["FrameOverlap"] = configuration[AnalysisKeys.FrameOverlap] ?? "0.0";
            double windowOverlap = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.0;

            // #MinHz: 500
            // #MaxHz: 3500
            // #NOISE REDUCTION PARAMETERS
            configDict["DoNoiseReduction"] = configuration["DoNoiseReduction"] ?? "true";
            configDict["BgNoiseThreshold"] = configuration["BgNoiseThreshold"] ?? "3.0";

            configDict["ADD_AXES"]             = configuration["ADD_AXES"] ?? "true";
            configDict["AddSegmentationTrack"] = configuration["AddSegmentationTrack"] ?? "true";

            // 3: GET RECORDING
            var startOffsetMins = TimeSpan.Zero;
            var endOffsetMins   = TimeSpan.Zero;

            FileInfo fiOutputSegment = fiSourceRecording;

            if (!(startOffsetMins == TimeSpan.Zero && endOffsetMins == TimeSpan.Zero))
            {
                var buffer = new TimeSpan(0, 0, 0);
                fiOutputSegment = new FileInfo(Path.Combine(fiImage.DirectoryName, "tempWavFile.wav"));

                //This method extracts segment and saves to disk at the location fiOutputSegment.
                var resampleRate = configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? AppConfigHelper.DefaultTargetSampleRate;
                AudioRecording.ExtractSegment(fiSourceRecording, startOffsetMins, endOffsetMins, buffer, resampleRate, fiOutputSegment);
            }

            var recording = new AudioRecording(fiOutputSegment.FullName);

            // EXTRACT ENVELOPE and SPECTROGRAM
            var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, windowOverlap);

            // average absolute value over the minute recording
            ////double[] avAbsolute = dspOutput.Average;

            // (A) ################################## EXTRACT INDICES FROM THE SIGNAL WAVEFORM ##################################
            // var wavDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            // double totalSeconds = wavDuration.TotalSeconds;

            // double[] signalEnvelope = dspOutput.Envelope;
            // double avSignalEnvelope = signalEnvelope.Average();
            // double[] frameEnergy = dspOutput.FrameEnergy;
            // double highAmplIndex = dspOutput.HighAmplitudeCount / totalSeconds;
            // double binWidth = dspOutput.BinWidth;
            // int nyquistBin = dspOutput.NyquistBin;
            // dspOutput.WindowPower,
            // dspOutput.FreqBinWidth
            int    nyquistFreq = dspOutput.NyquistFreq;
            double epsilon     = recording.Epsilon;

            // i: prepare amplitude spectrogram
            double[,] amplitudeSpectrogramData = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram.
            var image1 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(amplitudeSpectrogramData));

            // ii: prepare decibel spectrogram prior to noise removal
            double[,] decibelSpectrogramdata = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, recording.SampleRate, epsilon);
            decibelSpectrogramdata           = MatrixTools.NormaliseMatrixValues(decibelSpectrogramdata);
            var image2 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata));

            // iii: Calculate background noise spectrum in decibels
            // Calculate noise value for each freq bin.
            double sdCount        = 0.0; // number of SDs above the mean for noise removal
            var    decibelProfile = NoiseProfile.CalculateModalNoiseProfile(decibelSpectrogramdata, sdCount);

            // DataTools.writeBarGraph(dBProfile.NoiseMode);

            // iv: Prepare noise reduced spectrogram
            decibelSpectrogramdata = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogramdata, decibelProfile.NoiseThresholds);

            //double dBThreshold = 1.0; // SPECTRAL dB THRESHOLD for smoothing background
            //decibelSpectrogramdata = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogramdata, dBThreshold);
            var image3 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata));

            // prepare new sonogram config and draw second image going down different code pathway
            var config = new SonogramConfig
            {
                MinFreqBand             = 0,
                MaxFreqBand             = 10000,
                NoiseReductionType      = SNR.KeyToNoiseReductionType("Standard"),
                NoiseReductionParameter = 1.0,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,
            };

            //var mfccConfig = new MfccConfiguration(config);
            int  bandCount  = config.mfccConfig.FilterbankCount;
            bool doMelScale = config.mfccConfig.DoMelScale;
            int  ccCount    = config.mfccConfig.CcCount;
            int  fftBins    = config.FreqBinCount; //number of Hz bands = 2^N +1 because includes the DC band
            int  minHz      = config.MinFreqBand ?? 0;
            int  maxHz      = config.MaxFreqBand ?? nyquistFreq;

            var standardSonogram = new SpectrogramStandard(config, recording.WavReader);
            var image4           = standardSonogram.GetImage();

            // TODO next line crashes - does not produce cepstral sonogram.
            //SpectrogramCepstral cepSng = new SpectrogramCepstral(config, recording.WavReader);
            //Image image5 = cepSng.GetImage();

            //var mti = SpectrogramTools.Sonogram2MultiTrackImage(sonogram, configDict);
            //var image = mti.GetImage();

            //Image image = SpectrogramTools.Matrix2SonogramImage(deciBelSpectrogram, config);
            //Image image = SpectrogramTools.Audio2SonogramImage(FileInfo fiAudio, Dictionary<string, string> configDict);

            //prepare sonogram images
            var protoImage6 = new Image_MultiTrack(standardSonogram.GetImage(doHighlightSubband: false, add1KHzLines: true, doMelScale: false));

            protoImage6.AddTrack(ImageTrack.GetTimeTrack(standardSonogram.Duration, standardSonogram.FramesPerSecond));
            protoImage6.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, protoImage6.SonogramImage.Width));
            protoImage6.AddTrack(ImageTrack.GetSegmentationTrack(standardSonogram));
            var image6 = protoImage6.GetImage();

            var list = new List <Image <Rgb24> >();

            list.Add(image1); // amplitude spectrogram
            list.Add(image2); // decibel spectrogram before noise removal
            list.Add(image3); // decibel spectrogram after noise removal
            list.Add(image4); // second version of noise reduced spectrogram

            //list.Add(image5); // ceptral sonogram
            list.Add(image6.CloneAs <Rgb24>()); // multitrack image

            Image finalImage = ImageTools.CombineImagesVertically(list);

            finalImage.Save(fiImage.FullName);

            ////2: NOISE REMOVAL
            //double[,] originalSg = sonogram.Data;
            //double[,] mnr        = sonogram.Data;
            //mnr = ImageTools.WienerFilter(mnr, 3);

            //double backgroundThreshold = 4.0;   //SETS MIN DECIBEL BOUND
            //var output = SNR.NoiseReduce(mnr, NoiseReductionType.STANDARD, backgroundThreshold);

            //double ConfigRange = 70;        //sets the the max dB
            //mnr = SNR.SetConfigRange(output.Item1, 0.0, ConfigRange);

            ////3: Spectral tracks sonogram
            //byte[,] binary = MatrixTools.IdentifySpectralRidges(mnr);
            //binary = MatrixTools.ThresholdBinarySpectrum(binary, mnr, 10);
            //binary = MatrixTools.RemoveOrphanOnesInBinaryMatrix(binary);
            ////binary = MatrixTools.PickOutLines(binary); //syntactic approach

            //sonogram.SetBinarySpectrum(binary);
            ////sonogram.Data = SNR.SpectralRidges2Intensity(binary, originalSg);

            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, false));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.sonogramImage.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_tracks.png";
            //image.Save(fn);
            //LoggedConsole.WriteLine("Spectral tracks sonogram to file: " + fn);

            //3: prepare image of spectral peaks sonogram
            //sonogram.Data = SNR.NoiseReduce_Peaks(originalSg, dynamicRange);
            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_peaks.png";
            //image.Save(fn);

            //LoggedConsole.WriteLine("Spectral peaks  sonogram to file: " + fn);

            //4: Sobel approach
            //sonogram.Data = SNR.NoiseReduce_Sobel(originalSg, dynamicRange);
            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_sobel.png";
            //image.Save(fn);
            //LoggedConsole.WriteLine("Sobel sonogram to file: " + fn);

            // I1.txt contains the sonogram matrix produced by matlab
            //string matlabFile = @"C:\SensorNetworks\Software\AudioAnalysis\AED\Test\matlab\GParrots_JB2_20090607-173000.wav_minute_3\I1.txt";
            //double[,] matlabMatrix = Util.fileToMatrix(matlabFile, 256, 5166);

            //LoggedConsole.WriteLine(matrix[0, 2] + " vs " + matlabMatrix[254, 0]);
            //LoggedConsole.WriteLine(matrix[0, 3] + " vs " + matlabMatrix[253, 0]);

            // TODO put this back once sonogram issues resolved

            /*
             * LoggedConsole.WriteLine("START: AED");
             * IEnumerable<Oblong> oblongs = AcousticEventDetection.detectEvents(3.0, 100, matrix);
             * LoggedConsole.WriteLine("END: AED");
             *
             *
             * //set up static variables for init Acoustic events
             * //AcousticEvent.   doMelScale = config.DoMelScale;
             * AcousticEvent.FreqBinCount = config.FreqBinCount;
             * AcousticEvent.FreqBinWidth = config.FftConfig.NyquistFreq / (double)config.FreqBinCount;
             * //  int minF        = (int)config.MinFreqBand;
             * //  int maxF        = (int)config.MaxFreqBand;
             * AcousticEvent.FrameDuration = config.GetFrameOffset();
             *
             *
             * var events = new List<EventPatternRecog.Rectangle>();
             * foreach (Oblong o in oblongs)
             * {
             *  var e = new AcousticEvent(o);
             *  events.Add(new EventPatternRecog.Rectangle(e.StartTime, (double) e.MaxFreq, e.StartTime + e.Duration, (double)e.MinFreq));
             *  //LoggedConsole.WriteLine(e.StartTime + "," + e.Duration + "," + e.MinFreq + "," + e.MaxFreq);
             * }
             *
             * LoggedConsole.WriteLine("# AED events: " + events.Count);
             *
             * LoggedConsole.WriteLine("START: EPR");
             * IEnumerable<EventPatternRecog.Rectangle> eprRects = EventPatternRecog.detectGroundParrots(events);
             * LoggedConsole.WriteLine("END: EPR");
             *
             * var eprEvents = new List<AcousticEvent>();
             * foreach (EventPatternRecog.Rectangle r in eprRects)
             * {
             *  var ae = new AcousticEvent(r.Left, r.Right - r.Left, r.Bottom, r.Top, false);
             *  LoggedConsole.WriteLine(ae.WriteProperties());
             *  eprEvents.Add(ae);
             * }
             *
             * string imagePath = Path.Combine(outputFolder, "RESULTS_" + Path.GetFileNameWithoutExtension(recording.BaseName) + ".png");
             *
             * bool doHighlightSubband = false; bool add1kHzLines = true;
             * var image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
             * //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
             * //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
             * //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
             * image.AddEvents(eprEvents);
             * image.Save(outputFolder + wavFileName + ".png");
             */

            LoggedConsole.WriteLine("\nFINISHED!");
        }
        } //Analysis()

        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent> > DetectHarmonics(
            AudioRecording recording,
            double intensityThreshold,
            int minHz,
            int minFormantgap,
            int maxFormantgap,
            double minDuration,
            int windowSize,
            double windowOverlap,
            TimeSpan segmentStartOffset)
        {
            //i: MAKE SONOGRAM
            int    numberOfBins    = 32;
            double binWidth        = recording.SampleRate / (double)windowSize;
            int    sr              = recording.SampleRate;
            double frameDuration   = windowSize / (double)sr;             // Duration of full frame or window in seconds
            double frameOffset     = frameDuration * (1 - windowOverlap); //seconds between starts of consecutive frames
            double framesPerSecond = 1 / frameOffset;

            //double framesPerSecond = sr / (double)windowSize;
            //int frameOffset = (int)(windowSize * (1 - overlap));
            //int frameCount = (length - windowSize + frameOffset) / frameOffset;

            double epsilon  = Math.Pow(0.5, recording.BitsPerSample - 1);
            var    results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(
                recording.WavReader.Samples,
                sr,
                epsilon,
                windowSize,
                windowOverlap);

            double[] avAbsolute = results2.Average; //average absolute value over the minute recording

            //double[] envelope = results2.Item2;
            double[,]
            matrix = results2
                     .AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            double windowPower = results2.WindowPower;

            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int minBin = (int)Math.Round(minHz / binWidth);
            int maxHz  = (int)Math.Round(minHz + (numberOfBins * binWidth));

            int rowCount = matrix.GetLength(0);
            int colCount = matrix.GetLength(1);
            int maxbin   = minBin + numberOfBins;

            double[,] subMatrix = MatrixTools.Submatrix(matrix, 0, minBin + 1, rowCount - 1, maxbin);

            //ii: DETECT HARMONICS
            int zeroBinCount = 5; //to remove low freq content which dominates the spectrum
            var results      = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount);

            double[] intensity   = results.Item1; //an array of periodicity scores
            double[] periodicity = results.Item2;

            //transfer periodicity info to a hits matrix.
            //intensity = DataTools.filterMovingAverage(intensity, 3);
            double[] scoreArray = new double[intensity.Length];
            var      hits       = new double[rowCount, colCount];

            for (int r = 0; r < rowCount; r++)
            {
                double relativePeriod = periodicity[r] / numberOfBins / 2;
                if (intensity[r] > intensityThreshold)
                {
                    for (int c = minBin; c < maxbin; c++)
                    {
                        hits[r, c] = relativePeriod;
                    }
                }

                double herzPeriod = periodicity[r] * binWidth;
                if (herzPeriod > minFormantgap && herzPeriod < maxFormantgap)
                {
                    scoreArray[r] = 2 * intensity[r] * intensity[r]; //enhance high score wrt low score.
                }
            }

            scoreArray = DataTools.filterMovingAverage(scoreArray, 11);

            //iii: CONVERT TO ACOUSTIC EVENTS
            double maxDuration = 100000.0; //abitrary long number - do not want to restrict duration of machine noise
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                scoreArray,
                minHz,
                maxHz,
                framesPerSecond,
                binWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            hits = null;

            //set up the songogram to return. Use the existing amplitude sonogram
            int                bitsPerSample = recording.WavReader.BitsPerSample;
            TimeSpan           duration      = recording.Duration;
            NoiseReductionType nrt           = SNR.KeyToNoiseReductionType("STANDARD");

            var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram(
                recording.BaseName,
                windowSize,
                windowOverlap,
                bitsPerSample,
                windowPower,
                sr,
                duration,
                nrt,
                matrix);

            sonogram.DecibelsNormalised = new double[rowCount];

            //foreach frame or time step
            for (int i = 0; i < rowCount; i++)
            {
                sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]);
            }

            sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised);
            return(Tuple.Create(sonogram, hits, scoreArray, predictedEvents));
        } //end Execute_HDDetect
示例#7
0
        public static void Execute(Arguments arguments)
        {
            const string Title = "# DETERMINING SIGNAL TO NOISE RATIO IN RECORDING";
            string       date  = "# DATE AND TIME: " + DateTime.Now;

            Log.WriteLine(Title);
            Log.WriteLine(date);
            Log.Verbosity = 1;

            var input                    = arguments.Source;
            var sourceFileName           = input.Name;
            var outputDir                = arguments.Output;
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(input.FullName);
            var outputTxtPath            = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".txt").ToFileInfo();

            Log.WriteIfVerbose("# Recording file: " + input.FullName);
            Log.WriteIfVerbose("# Config file:    " + arguments.Config);
            Log.WriteIfVerbose("# Output folder =" + outputDir.FullName);
            FileTools.WriteTextFile(outputTxtPath.FullName, date + "\n# Recording file: " + input.FullName);

            //READ PARAMETER VALUES FROM INI FILE
            // load YAML configuration
            Config configuration = ConfigFile.Deserialize(arguments.Config);

            //ii: SET SONOGRAM CONFIGURATION
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName        = input.FullName;
            sonoConfig.WindowSize         = configuration.GetIntOrNull(AnalysisKeys.KeyFrameSize) ?? 512;
            sonoConfig.WindowOverlap      = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.5;
            sonoConfig.WindowFunction     = configuration[AnalysisKeys.KeyWindowFunction];
            sonoConfig.NPointSmoothFFT    = configuration.GetIntOrNull(AnalysisKeys.KeyNPointSmoothFft) ?? 256;
            sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType(configuration[AnalysisKeys.NoiseReductionType]);

            int    minHz          = configuration.GetIntOrNull("MIN_HZ") ?? 0;
            int    maxHz          = configuration.GetIntOrNull("MAX_HZ") ?? 11050;
            double segK1          = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K1") ?? 0;
            double segK2          = configuration.GetDoubleOrNull("SEGMENTATION_THRESHOLD_K2") ?? 0;
            double latency        = configuration.GetDoubleOrNull("K1_K2_LATENCY") ?? 0;
            double vocalGap       = configuration.GetDoubleOrNull("VOCAL_GAP") ?? 0;
            double minVocalLength = configuration.GetDoubleOrNull("MIN_VOCAL_DURATION") ?? 0;

            //bool DRAW_SONOGRAMS = (bool?)configuration.DrawSonograms ?? true;    //options to draw sonogram

            //double intensityThreshold = Acoustics.AED.Default.intensityThreshold;
            //if (dict.ContainsKey(key_AED_INTENSITY_THRESHOLD)) intensityThreshold = Double.Parse(dict[key_AED_INTENSITY_THRESHOLD]);
            //int smallAreaThreshold = Acoustics.AED.Default.smallAreaThreshold;
            //if( dict.ContainsKey(key_AED_SMALL_AREA_THRESHOLD))   smallAreaThreshold = Int32.Parse(dict[key_AED_SMALL_AREA_THRESHOLD]);

            // COnvert input recording into wav
            var convertParameters = new AudioUtilityRequest {
                TargetSampleRate = 17640
            };
            var fileToAnalyse = new FileInfo(Path.Combine(outputDir.FullName, "temp.wav"));

            if (File.Exists(fileToAnalyse.FullName))
            {
                File.Delete(fileToAnalyse.FullName);
            }

            var convertedFileInfo = AudioFilePreparer.PrepareFile(
                input,
                fileToAnalyse,
                convertParameters,
                outputDir);

            // (A) ##########################################################################################################################
            AudioRecording recording              = new AudioRecording(fileToAnalyse.FullName);
            int            signalLength           = recording.WavReader.Samples.Length;
            TimeSpan       wavDuration            = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            double         frameDurationInSeconds = sonoConfig.WindowSize / (double)recording.SampleRate;
            TimeSpan       frameDuration          = TimeSpan.FromTicks((long)(frameDurationInSeconds * TimeSpan.TicksPerSecond));
            int            stepSize = (int)Math.Floor(sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap));
            double         stepDurationInSeconds = sonoConfig.WindowSize * (1 - sonoConfig.WindowOverlap)
                                                   / recording.SampleRate;
            TimeSpan stepDuration    = TimeSpan.FromTicks((long)(stepDurationInSeconds * TimeSpan.TicksPerSecond));
            double   framesPerSecond = 1 / stepDuration.TotalSeconds;
            int      frameCount      = signalLength / stepSize;

            // (B) ################################## EXTRACT ENVELOPE and SPECTROGRAM ##################################
            var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                sonoConfig.WindowSize,
                sonoConfig.WindowOverlap);

            //double[] avAbsolute = dspOutput.Average; //average absolute value over the minute recording

            // (C) ################################## GET SIGNAL WAVEFORM ##################################
            double[] signalEnvelope   = dspOutput.Envelope;
            double   avSignalEnvelope = signalEnvelope.Average();

            // (D) ################################## GET Amplitude Spectrogram ##################################
            double[,] amplitudeSpectrogram = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram.

            // (E) ################################## Generate deciBel spectrogram from amplitude spectrogram
            double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1);

            double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(
                dspOutput.AmplitudeSpectrogram,
                dspOutput.WindowPower,
                recording.SampleRate,
                epsilon);

            LoggedConsole.WriteLine("# Finished calculating decibel spectrogram.");

            StringBuilder sb = new StringBuilder();

            sb.AppendLine("\nSIGNAL PARAMETERS");
            sb.AppendLine("Signal Duration     =" + wavDuration);
            sb.AppendLine("Sample Rate         =" + recording.SampleRate);
            sb.AppendLine("Min Signal Value    =" + dspOutput.MinSignalValue);
            sb.AppendLine("Max Signal Value    =" + dspOutput.MaxSignalValue);
            sb.AppendLine("Max Absolute Ampl   =" + signalEnvelope.Max().ToString("F3") + "  (See Note 1)");
            sb.AppendLine("Epsilon Ampl (1 bit)=" + epsilon);

            sb.AppendLine("\nFRAME PARAMETERS");
            sb.AppendLine("Window Size    =" + sonoConfig.WindowSize);
            sb.AppendLine("Frame Count    =" + frameCount);
            sb.AppendLine("Envelope length=" + signalEnvelope.Length);
            sb.AppendLine("Frame Duration =" + frameDuration.TotalMilliseconds.ToString("F3") + " ms");
            sb.AppendLine("Frame overlap  =" + sonoConfig.WindowOverlap);
            sb.AppendLine("Step Size      =" + stepSize);
            sb.AppendLine("Step duration  =" + stepDuration.TotalMilliseconds.ToString("F3") + " ms");
            sb.AppendLine("Frames Per Sec =" + framesPerSecond.ToString("F1"));

            sb.AppendLine("\nFREQUENCY PARAMETERS");
            sb.AppendLine("Nyquist Freq    =" + dspOutput.NyquistFreq + " Hz");
            sb.AppendLine("Freq Bin Width  =" + dspOutput.FreqBinWidth.ToString("F2") + " Hz");
            sb.AppendLine("Nyquist Bin     =" + dspOutput.NyquistBin);

            sb.AppendLine("\nENERGY PARAMETERS");
            double val = dspOutput.FrameEnergy.Min();

            sb.AppendLine(
                "Minimum dB / frame       =" + (10 * Math.Log10(val)).ToString("F2") + "  (See Notes 2, 3 & 4)");
            val = dspOutput.FrameEnergy.Max();
            sb.AppendLine("Maximum dB / frame       =" + (10 * Math.Log10(val)).ToString("F2"));

            sb.AppendLine("\ndB NOISE SUBTRACTION");
            double noiseRange = 2.0;

            //sb.AppendLine("Noise (estimate of mode) =" + sonogram.SnrData.NoiseSubtracted.ToString("F3") + " dB   (See Note 5)");
            //double noiseSpan = sonogram.SnrData.NoiseRange;
            //sb.AppendLine("Noise range              =" + noiseSpan.ToString("F2") + " to +" + (noiseSpan * -1).ToString("F2") + " dB   (See Note 6)");
            //sb.AppendLine("SNR (max frame-noise)    =" + sonogram.SnrData.Snr.ToString("F2") + " dB   (See Note 7)");

            //sb.Append("\nSEGMENTATION PARAMETERS");
            //sb.Append("Segment Thresholds K1: {0:f2}.  K2: {1:f2}  (See Note 8)", segK1, segK2);
            //sb.Append("# Event Count = " + predictedEvents.Count());

            FileTools.Append2TextFile(outputTxtPath.FullName, sb.ToString());
            FileTools.Append2TextFile(outputTxtPath.FullName, GetSNRNotes(noiseRange).ToString());

            // (F) ################################## DRAW IMAGE 1: original spectorgram
            Log.WriteLine("# Start drawing noise reduced sonograms.");
            TimeSpan X_AxisInterval = TimeSpan.FromSeconds(1);

            //int Y_AxisInterval = (int)Math.Round(1000 / dspOutput.FreqBinWidth);
            int nyquist    = recording.SampleRate / 2;
            int hzInterval = 1000;

            var image1 = DrawSonogram(deciBelSpectrogram, wavDuration, X_AxisInterval, stepDuration, nyquist, hzInterval);

            // (G) ################################## Calculate modal background noise spectrum in decibels
            //double SD_COUNT = -0.5; // number of SDs above the mean for noise removal
            //NoiseReductionType nrt = NoiseReductionType.MODAL;
            //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, SD_COUNT);

            //double upperPercentileBound = 0.2;    // lowest percentile for noise removal
            //NoiseReductionType nrt = NoiseReductionType.LOWEST_PERCENTILE;
            //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound);

            // (H) ################################## Calculate BRIGGS noise removal from amplitude spectrum
            int percentileBound = 20; // low energy percentile for noise removal

            //double binaryThreshold   = 0.6;   //works for higher SNR recordings
            double binaryThreshold = 0.4; //works for lower SNR recordings

            //double binaryThreshold = 0.3;   //works for lower SNR recordings
            double[,] m = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetMask(
                amplitudeSpectrogram,
                percentileBound,
                binaryThreshold);

            string title  = "TITLE";
            var    image2 = NoiseRemoval_Briggs.DrawSonogram(
                m,
                wavDuration,
                X_AxisInterval,
                stepDuration,
                nyquist, hzInterval,
                title);

            //Image image2 = NoiseRemoval_Briggs.BriggsNoiseFilterAndGetSonograms(amplitudeSpectrogram, upperPercentileBound, binaryThreshold,
            //                                                                          wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval);

            // (I) ################################## Calculate MEDIAN noise removal from amplitude spectrum

            //double upperPercentileBound = 0.8;    // lowest percentile for noise removal
            //NoiseReductionType nrt = NoiseReductionType.MEDIAN;
            //System.Tuple<double[,], double[]> tuple = SNR.NoiseReduce(deciBelSpectrogram, nrt, upperPercentileBound);

            //double[,] noiseReducedSpectrogram1 = tuple.Item1;  //
            //double[] noiseProfile              = tuple.Item2;  // smoothed modal profile

            //SNR.NoiseProfile dBProfile = SNR.CalculateNoiseProfile(deciBelSpectrogram, SD_COUNT);       // calculate noise value for each freq bin.
            //double[] noiseProfile = DataTools.filterMovingAverage(dBProfile.noiseThresholds, 7);        // smooth modal profile
            //double[,] noiseReducedSpectrogram1 = SNR.TruncateBgNoiseFromSpectrogram(deciBelSpectrogram, dBProfile.noiseThresholds);
            //Image image2 = DrawSonogram(noiseReducedSpectrogram1, wavDuration, X_AxisInterval, stepDuration, Y_AxisInterval);

            var combinedImage = ImageTools.CombineImagesVertically(image1, image2);

            string imagePath = Path.Combine(outputDir.FullName, fileNameWithoutExtension + ".png");

            combinedImage.Save(imagePath);

            // (G) ################################## Calculate modal background noise spectrum in decibels

            Log.WriteLine("# Finished recording:- " + input.Name);
        }
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(
            AudioRecording recording,
            Config configuration,
            TimeSpan segmentStartOffset,
            Lazy <IndexCalculateResult[]> getSpectralIndexes,
            DirectoryInfo outputDirectory,
            int?imageWidth)
        {
            //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // this default framesize seems to work for Lewin's Rail
            const int frameSize = 512;

            // DO NOT SET windowOverlap. Calculate it below.

            if (imageWidth == null)
            {
                throw new ArgumentNullException(nameof(imageWidth));
            }

            // check the sample rate. Must be 22050
            if (recording.WavReader.SampleRate != 22050)
            {
                throw new InvalidOperationException("Requires a 22050Hz file");
            }

            TimeSpan recordingDuration = recording.WavReader.Time;

            // check for the profiles in the config file
            bool hasProfiles = ConfigFile.HasProfiles(configuration);

            if (!hasProfiles)
            {
                throw new ConfigFileException("The Config file for L.pectoralis must contain a profiles object.");
            }

            // get the profile names
            string[]     profileNames     = ConfigFile.GetProfileNames(configuration);
            var          recognizerConfig = new LewinsRailConfig();
            var          prunedEvents     = new List <AcousticEvent>();
            var          plots            = new List <Plot>();
            BaseSonogram sonogram         = null;

            // cycle through the profiles and analyse recording using each of them
            foreach (var name in profileNames)
            {
                Log.Debug($"Reading profile <{name}>.");
                recognizerConfig.ReadConfigFile(configuration, name);

                // ignore oscillations above this threshold freq
                int maxOscilRate = (int)Math.Ceiling(1 / recognizerConfig.MinPeriod);

                // calculate frame overlap and ignor any user inut.
                double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap(
                    recording.SampleRate,
                    frameSize,
                    maxOscilRate);

                // i: MAKE SONOGRAM
                var sonoConfig = new SonogramConfig
                {
                    SourceFName = recording.BaseName,

                    //set default values - ignore those set by user
                    WindowSize    = frameSize,
                    WindowOverlap = windowOverlap,

                    // the default window is HAMMING
                    //WindowFunction = WindowFunctions.HANNING.ToString(),
                    //WindowFunction = WindowFunctions.NONE.ToString(),
                    // if do not use noise reduction can get a more sensitive recogniser.
                    //NoiseReductionType = NoiseReductionType.NONE,
                    NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
                };

                //#############################################################################################################################################
                //DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
                var results = Analysis(recording, sonoConfig, recognizerConfig, this.ReturnDebugImage, segmentStartOffset);

                // ######################################################################

                if (results == null)
                {
                    return(null); //nothing to process
                }

                sonogram = results.Item1;

                //var hits = results.Item2;
                var scoreArray      = results.Item3;
                var predictedEvents = results.Item4;
                var debugImage      = results.Item5;

                //#############################################################################################################################################

                if (debugImage == null)
                {
                    Log.Debug("DebugImage is null, not writing file");
                }
                else if (MainEntry.InDEBUG)
                {
                    var imageName = AnalysisResultName(recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");
                    var debugPath = outputDirectory.Combine(imageName);

                    //debugImage.Save(debugPath.FullName);
                }

                foreach (var ae in predictedEvents)
                {
                    // add additional info
                    if (!(ae.Score > recognizerConfig.EventThreshold))
                    {
                        continue;
                    }

                    ae.Name                   = recognizerConfig.AbbreviatedSpeciesName;
                    ae.SpeciesName            = recognizerConfig.SpeciesName;
                    ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                    ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                    prunedEvents.Add(ae);
                }

                // increase very low scores
                for (int j = 0; j < scoreArray.Length; j++)
                {
                    scoreArray[j] *= 4;
                    if (scoreArray[j] > 1.0)
                    {
                        scoreArray[j] = 1.0;
                    }
                }

                var plot = new Plot(this.DisplayName, scoreArray, recognizerConfig.EventThreshold);
                plots.Add(plot);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = null,
                Plots = plots,
                Events = prunedEvents,
            });
        }
        /// <summary>
        /// Does the Analysis
        /// Returns a DataTable
        /// </summary>
        /// <param name="fiSegmentOfSourceFile"></param>
        /// <param name="configDict"></param>
        /// <param name="diOutputDir"></param>
        /// <param name="opFileName"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="config"></param>
        /// <param name="segmentAudioFile"></param>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, DirectoryInfo diOutputDir, string opFileName, TimeSpan segmentStartOffset)
        {
            //set default values
            int    bandWidth          = 500; //detect bars in bands of this width.
            int    frameSize          = 1024;
            double windowOverlap      = 0.0;
            double intensityThreshold = double.Parse(configDict[key_INTENSITY_THRESHOLD]);
            //intensityThreshold = 0.01;

            AudioRecording recording = AudioRecording.GetAudioRecording(fiSegmentOfSourceFile, RESAMPLE_RATE, diOutputDir.FullName, opFileName);

            if (recording == null)
            {
                LoggedConsole.WriteLine("############ WARNING: Recording could not be obtained - likely file does not exist.");
                return(null);
            }
            int      sr                   = recording.SampleRate;
            double   binWidth             = recording.SampleRate / (double)frameSize;
            double   frameDuration        = frameSize / (double)sr;
            double   frameOffset          = frameDuration * (1 - windowOverlap); //seconds between start of each frame
            double   framesPerSecond      = 1 / frameOffset;
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      colStep              = (int)Math.Round(bandWidth / binWidth);

            //i: GET SONOGRAM AS MATRIX
            double epsilon  = Math.Pow(0.5, recording.BitsPerSample - 1);
            var    results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, sr, epsilon, frameSize, windowOverlap);

            double[] avAbsolute = results2.Average;                //average absolute value over the minute recording
            //double[] envelope = results2.Item2;
            double[,] spectrogram = results2.AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            double windowPower = results2.WindowPower;

            //############################ NEXT LINE FOR DEBUGGING ONLY
            //spectrogram = GetTestSpectrogram(spectrogram.GetLength(0), spectrogram.GetLength(1), 0.01, 0.03);

            var output         = DetectGratingEvents(spectrogram, colStep, intensityThreshold);
            var amplitudeArray = output.Item2; //for debug purposes only

            //convert List of Dictionary events to List of ACousticevents.
            //also set up the hits matrix.
            int rowCount       = spectrogram.GetLength(0);
            int colCount       = spectrogram.GetLength(1);
            var hitsMatrix     = new double[rowCount, colCount];
            var acousticEvents = new List <AcousticEvent>();

            double minFrameCount = 8; //this assumes that the minimum grid is 2 * 4 = 8 long

            foreach (Dictionary <string, double> item in output.Item1)
            {
                int minRow     = (int)item[key_START_FRAME];
                int maxRow     = (int)item[key_END_FRAME];
                int frameCount = maxRow - minRow + 1;
                if (frameCount < minFrameCount)
                {
                    continue;                             //only want events that are over a minimum length
                }
                int    minCol      = (int)item[key_MIN_FREQBIN];
                int    maxCol      = (int)item[key_MAX_FREQBIN];
                double periodicity = item[key_PERIODICITY];

                double[] subarray = DataTools.Subarray(avAbsolute, minRow, maxRow - minRow + 1);
                double   severity = 0.1;
                int[]    bounds   = DataTools.Peaks_CropToFirstAndLast(subarray, severity);
                minRow = minRow + bounds[0];
                maxRow = minRow + bounds[1];
                if (maxRow >= rowCount)
                {
                    maxRow = rowCount - 1;
                }

                Oblong o  = new Oblong(minRow, minCol, maxRow, maxCol);
                var    ae = new AcousticEvent(segmentStartOffset, o, results2.NyquistFreq, frameSize, frameDuration, frameOffset, frameCount);
                ae.Name            = string.Format("p={0:f0}", periodicity);
                ae.Score           = item[key_SCORE];
                ae.ScoreNormalised = item[key_SCORE] / 0.5;
                acousticEvents.Add(ae);

                //display event on the hits matrix
                for (int r = minRow; r < maxRow; r++)
                {
                    for (int c = minCol; c < maxCol; c++)
                    {
                        hitsMatrix[r, c] = periodicity;
                    }
                }
            } //foreach

            //set up the songogram to return. Use the existing amplitude sonogram
            int bitsPerSample = recording.WavReader.BitsPerSample;
            //NoiseReductionType nrt = SNR.Key2NoiseReductionType("NONE");
            NoiseReductionType nrt = SNR.KeyToNoiseReductionType("STANDARD");
            var sonogram           = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram(recording.BaseName, frameSize, windowOverlap, bitsPerSample, windowPower, sr, tsRecordingtDuration, nrt, spectrogram);

            sonogram.DecibelsNormalised = new double[sonogram.FrameCount];
            for (int i = 0; i < sonogram.FrameCount; i++) //foreach frame or time step
            {
                sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]);
            }
            sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised);

            return(Tuple.Create(sonogram, hitsMatrix, amplitudeArray, acousticEvents, tsRecordingtDuration));
        } //Analysis()
示例#10
0
        /// <summary>
        /// THE KEY ANALYSIS METHOD.
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LitoriaBicolorConfig lbConfig,
            bool drawDebugImage,
            TimeSpan segmentStartOffset)
        {
            double decibelThreshold   = lbConfig.DecibelThreshold; //dB
            double intensityThreshold = lbConfig.IntensityThreshold;

            //double eventThreshold = lbConfig.EventThreshold; //in 0-1

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            //TimeSpan tsRecordingtDuration = recording.Duration();
            int    sr              = recording.SampleRate;
            double freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double framesPerSecond = freqBinWidth;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double dctDuration = 3 * lbConfig.MaxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            //lowerArray = DataTools.filterMovingAverage(lowerArray, 3);
            //upperArray = DataTools.filterMovingAverage(upperArray, 3);

            double[] amplitudeScores  = DataTools.SumMinusDifference(lowerArray, upperArray);
            double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0);

            // Could smooth here rather than above. Above seemed slightly better?
            amplitudeScores  = DataTools.filterMovingAverage(amplitudeScores, 7);
            differenceScores = DataTools.filterMovingAverage(differenceScores, 7);

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lbConfig.LowerBandMinHz,
                lbConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                decibelThreshold,
                lbConfig.MinDuration,
                lbConfig.MaxDuration,
                segmentStartOffset);

            for (int i = 0; i < differenceScores.Length; i++)
            {
                if (differenceScores[i] < 1.0)
                {
                    differenceScores[i] = 0.0;
                }
            }

            // init the score array
            double[] scores = new double[rowCount];

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            // var hits = new double[rowCount, colCount];
            double[,] hits = null;

            // init confirmed events
            var confirmedEvents = new List <AcousticEvent>();

            // add names into the returned events
            foreach (var ae in predictedEvents)
            {
                //rowtop,  rowWidth
                int    eventStart       = ae.Oblong.RowTop;
                int    eventWidth       = ae.Oblong.RowWidth;
                int    step             = 2;
                double maximumIntensity = 0.0;

                // scan the event to get oscillation period and intensity
                for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step)
                {
                    // Look for oscillations in the difference array
                    double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength);
                    Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity);

                    bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod;

                    //Console.WriteLine($"step={i}    period={period:f4}");

                    if (!periodWithinBounds)
                    {
                        continue;
                    }

                    // lay down score for sample length
                    for (int j = 0; j < dctLength; j++)
                    {
                        if (scores[i + j] < intensity)
                        {
                            scores[i + j] = intensity;
                        }
                    }

                    if (maximumIntensity < intensity)
                    {
                        maximumIntensity = intensity;
                    }
                }

                // add abbreviatedSpeciesName into event
                if (maximumIntensity >= intensityThreshold)
                {
                    ae.Name             = "L.b";
                    ae.Score_MaxInEvent = maximumIntensity;
                    confirmedEvents.Add(ae);
                }
            }

            //######################################################################

            // calculate the cosine similarity scores
            var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold);

            //DEBUG IMAGE this recognizer only. MUST set false for deployment.
            Image debugImage = null;

            if (drawDebugImage)
            {
                // display a variety of debug score arrays

                //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold);
                //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold);
                //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold);
                DataTools.Normalise(amplitudeScores, decibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold);
                DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold);
                var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, sumDiffPlot, differencePlot
                };

                // other debug plots
                //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot };
                debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits);
            }

            // return new sonogram because it makes for more easy interpretation of the image
            var returnSonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = 512,
                WindowOverlap = 0,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader);

            return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage));
        } //Analysis()
//Analyze()

        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD
        /// Returns a DataTable
        /// </summary>
        /// <param name="fiSegmentOfSourceFile"></param>
        /// <param name="analysisSettings"></param>
        /// <param name="originalSampleRate"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="configDict"></param>
        /// <param name="diOutputDir"></param>
        public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, AnalysisSettings analysisSettings, int originalSampleRate, TimeSpan segmentStartOffset)
        {
            Dictionary <string, string> configDict = analysisSettings.ConfigDict;
            int originalAudioNyquist = originalSampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz.

            //set default values - ignore those set by user
            int    frameSize          = 32;
            double windowOverlap      = 0.3;
            int    xCorrelationLength = 256; //for Xcorrelation   - 256 frames @801 = 320ms, almost 1/3 second.
            //int xCorrelationLength = 128;   //for Xcorrelation   - 128 frames @801 = 160ms, almost 1/6 second.
            //int xCorrelationLength = 64;   //for Xcorrelation   - 64 frames @128 = 232ms, almost 1/4 second.
            //int xCorrelationLength = 16;   //for Xcorrelation   - 16 frames @128 = 232ms, almost 1/4 second.
            double dBThreshold = 12.0;

            // read frog data to datatable
            var dt = CsvTools.ReadCSVToTable(configDict[key_FROG_DATA], true);                     // read file contining parameters of frog calls to a table

            double intensityThreshold = double.Parse(configDict[AnalysisKeys.IntensityThreshold]); //in 0-1
            double minDuration        = double.Parse(configDict[AnalysisKeys.MinDuration]);        // seconds
            double maxDuration        = double.Parse(configDict[AnalysisKeys.MaxDuration]);        // seconds
            double minPeriod          = double.Parse(configDict[AnalysisKeys.MinPeriodicity]);     // seconds
            double maxPeriod          = double.Parse(configDict[AnalysisKeys.MaxPeriodicity]);     // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName   = recording.BaseName;
            sonoConfig.WindowSize    = frameSize;
            sonoConfig.WindowOverlap = windowOverlap;
            //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE");
            sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD");   //must do noise removal
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   frameOffset     = sonoConfig.GetFrameOffset(sr);
            double   framesPerSecond = 1 / frameOffset;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            //iii: GET TRACKS
            int nhLimit      = 3; //limit of neighbourhood around maximum
            var peaks        = DataTools.GetPeakValues(sonogram.DecibelsPerFrame);
            var tuple        = SpectralTrack.GetSpectralMaxima(sonogram.DecibelsPerFrame, sonogram.Data, dBThreshold, nhLimit);
            var maxFreqArray = tuple.Item1; //array (one element per frame) indicating which freq bin has max amplitude.
            var hitsMatrix   = tuple.Item2;
            int herzOffset   = 0;
            int maxFreq      = 6000;
            var tracks       = SpectralTrack.GetSpectralTracks(maxFreqArray, framesPerSecond, freqBinWidth, herzOffset, SpectralTrack.MIN_TRACK_DURATION, SpectralTrack.MAX_INTRASYLLABLE_GAP, maxFreq);

            double severity     = 0.5;
            double dynamicRange = 60; // deciBels above background noise. BG noise has already been removed from each bin.
            // convert sonogram to a list of frequency bin arrays
            var listOfFrequencyBins = SpectrogramTools.Sonogram2ListOfFreqBinArrays(sonogram, dynamicRange);
            int minFrameLength      = SpectralTrack.FrameCountEquivalent(SpectralTrack.MIN_TRACK_DURATION, framesPerSecond);

            for (int i = tracks.Count - 1; i >= 0; i--)
            {
                tracks[i].CropTrack(listOfFrequencyBins, severity);
                if (tracks[i].Length < minFrameLength)
                {
                    tracks.Remove(tracks[i]);
                }
            } // foreach track

            foreach (SpectralTrack track in tracks) // find any periodicity in the track and calculate its score.
            {
                SpectralTrack.DetectTrackPeriodicity(track, xCorrelationLength, listOfFrequencyBins, sonogram.FramesPerSecond);
            } // foreach track

            int rowCount       = sonogram.Data.GetLength(0);
            int MAX_FREQ_BOUND = 6000;
            int topBin         = (int)Math.Round(MAX_FREQ_BOUND / freqBinWidth);
            var plots          = CreateScorePlots(tracks, rowCount, topBin);

            //iv: CONVERT TRACKS TO ACOUSTIC EVENTS
            List <AcousticEvent> frogEvents = SpectralTrack.ConvertTracks2Events(tracks, segmentStartOffset);

            // v: GET FROG IDs
            //var frogEvents = new List<AcousticEvent>();
            foreach (AcousticEvent ae in frogEvents)
            {
                double oscRate = 1 / ae.Periodicity;
                // ae.DominantFreq
                // ae.Score
                // ae.Duration
                //ClassifyFrogEvent(ae);
                string[] names = ClassifyFrogEvent(ae.DominantFreq, oscRate, dt);
                ae.Name  = names[0];
                ae.Name2 = names[1];
            }

            return(Tuple.Create(sonogram, hitsMatrix, plots, frogEvents, tsRecordingtDuration));
        } //Analysis()
        } //Analyze()

        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD
        /// Returns a DataTable
        /// </summary>
        /// <param name="fiSegmentOfSourceFile"></param>
        /// <param name="configDict"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="diOutputDir"></param>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values - ignor those set by user
            int    frameSize     = 1024;
            double windowOverlap = 0.0;

            int    upperBandMinHz     = int.Parse(configDict[KeyUpperfreqbandBtm]);
            int    upperBandMaxHz     = int.Parse(configDict[KeyUpperfreqbandTop]);
            int    lowerBandMinHz     = int.Parse(configDict[KeyLowerfreqbandBtm]);
            int    lowerBandMaxHz     = int.Parse(configDict[KeyLowerfreqbandTop]);
            double decibelThreshold   = double.Parse(configDict[KeyDecibelThreshold]);;  //dB
            double intensityThreshold = double.Parse(configDict[KeyIntensityThreshold]); //in 0-1
            double minDuration        = double.Parse(configDict[KeyMinDuration]);        // seconds
            double maxDuration        = double.Parse(configDict[KeyMaxDuration]);        // seconds
            double minPeriod          = double.Parse(configDict[KeyMinPeriod]);          // seconds
            double maxPeriod          = double.Parse(configDict[KeyMaxPeriod]);          // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName   = recording.BaseName;
            sonoConfig.WindowSize    = frameSize;
            sonoConfig.WindowOverlap = windowOverlap;
            //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE");
            sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD");
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = freqBinWidth;

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700

            int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, (rowCount - 1), lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, (rowCount - 1), upperBandMaxBin);

            int step         = (int)Math.Round(framesPerSecond); //take one second steps
            int stepCount    = rowCount / step;
            int sampleLength = 64;                               //64 frames = 3.7 seconds. Suitable for Lewins Rail.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES
            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if ((lowerSubarray.Length != sampleLength) || (upperSubarray.Length != sampleLength))
                {
                    break;
                }
                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 3;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;                                  //in real data these bins are dominant and hide other frequency content
                }
                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if ((period < minPeriod) || (period > maxPeriod))
                {
                    continue;
                }
                for (int j = 0; j < sampleLength; j++) //lay down score for sample length
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }
                    periodicity[start + j] = period;
                }
            }
            //######################################################################

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 5);
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerBandMinHz,
                upperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray);
            var hits = new double[rowCount, colCount];

            return(Tuple.Create(sonogram, hits, intensity, predictedEvents, tsRecordingtDuration));
        } //Analysis()
示例#13
0
        /// <summary>
        /// THIS IS THE CORE DETECTION METHOD
        /// Detects the human voice
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values
            int frameLength = 1024;

            if (configDict.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]);
            }

            double windowOverlap = 0.0;

            int    minHz              = int.Parse(configDict["MIN_HZ"]);
            int    minFormantgap      = int.Parse(configDict["MIN_FORMANT_GAP"]);
            int    maxFormantgap      = int.Parse(configDict["MAX_FORMANT_GAP"]);
            double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double minDuration        = double.Parse(configDict["MIN_DURATION"]);        // seconds
            double maxDuration        = double.Parse(configDict["MAX_DURATION"]);        // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig
            {
                //default values config
                SourceFName        = recording.BaseName,
                WindowSize         = frameLength,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            var    tsRecordingtDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int numberOfBins = 64;
            int minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int maxbin       = minBin + numberOfBins - 1;
            int maxHz        = (int)Math.Round(minHz + (numberOfBins * freqBinWidth));

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin);

            //ii: DETECT HARMONICS
            int zeroBinCount = 4; //to remove low freq content which dominates the spectrum
            var results      = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount);

            double[] intensity   = results.Item1;
            double[] periodicity = results.Item2; //an array of periodicity scores

            //intensity = DataTools.filterMovingAverage(intensity, 3);
            //expect humans to have max power >100 and < 1000 Hz. Set these bounds
            int lowerHumanMaxBound = (int)(100 / freqBinWidth);  //ignore 0-100 hz - too much noise
            int upperHumanMaxBound = (int)(3000 / freqBinWidth); //ignore above 2500 hz

            double[] scoreArray = new double[intensity.Length];
            for (int r = 0; r < rowCount; r++)
            {
                if (intensity[r] < intensityThreshold)
                {
                    continue;
                }

                //ignore locations with incorrect formant gap
                double herzPeriod = periodicity[r] * freqBinWidth;
                if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap)
                {
                    continue;
                }

                //find freq having max power and use info to adjust score.
                double[] spectrum = MatrixTools.GetRow(sonogram.Data, r);
                for (int j = 0; j < lowerHumanMaxBound; j++)
                {
                    spectrum[j] = 0.0;
                }

                for (int j = upperHumanMaxBound; j < spectrum.Length; j++)
                {
                    spectrum[j] = 0.0;
                }

                double[] peakvalues = DataTools.GetPeakValues(spectrum);
                int      maxIndex1  = DataTools.GetMaxIndex(peakvalues);
                peakvalues[maxIndex1] = 0.0;
                int maxIndex2 = DataTools.GetMaxIndex(peakvalues);
                int avMaxBin  = (maxIndex1 + maxIndex2) / 2;

                //int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth);
                int    freqWithMaxPower = (int)Math.Round(avMaxBin * freqBinWidth);
                double discount         = 1.0;
                if (freqWithMaxPower > 1000)
                {
                    discount = 0.0;
                }
                else
                if (freqWithMaxPower < 500)
                {
                    discount = 0.0;
                }

                //set scoreArray[r]  - ignore locations with low intensity
                if (intensity[r] > intensityThreshold)
                {
                    scoreArray[r] = intensity[r] * discount;
                }
            }

            //transfer info to a hits matrix.
            var    hits      = new double[rowCount, colCount];
            double threshold = intensityThreshold * 0.75; //reduced threshold for display of hits

            for (int r = 0; r < rowCount; r++)
            {
                if (scoreArray[r] < threshold)
                {
                    continue;
                }

                double herzPeriod = periodicity[r] * freqBinWidth;
                for (int c = minBin; c < maxbin; c++)
                {
                    //hits[r, c] = herzPeriod / (double)380;  //divide by 380 to get a relativePeriod;
                    hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap;  //to get a relativePeriod;
                }
            }

            //iii: CONVERT TO ACOUSTIC EVENTS
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                scoreArray,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            //remove isolated speech events - expect humans to talk like politicians
            //predictedEvents = Human2.FilterHumanSpeechEvents(predictedEvents);
            Plot plot = new Plot(AnalysisName, intensity, intensityThreshold);

            return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration));
        } //Analysis()