Example #1
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            var recognizerConfig = new LitoriaNasutaConfig();

            recognizerConfig.ReadConfigFile(configuration);

            // BETTER TO SET THESE. IGNORE USER!
            // this default framesize seems to work
            const int    frameSize     = 1024;
            const double windowOverlap = 0.0;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // use the default HAMMING window
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),

                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.None
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.0,
            };

            TimeSpan recordingDuration = recording.WavReader.Time;
            int      sr               = recording.SampleRate;
            double   freqBinWidth     = sr / (double)sonoConfig.WindowSize;
            int      minBin           = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1;
            int      maxBin           = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1;
            var      decibelThreshold = 3.0;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            int rowCount = sonogram.Data.GetLength(0);

            double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            //double[] topBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, maxBin + 3, (rowCount - 1), maxBin + 9);
            //double[] botBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin - 3, (rowCount - 1), minBin - 9);

            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeArray,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                decibelThreshold,
                recognizerConfig.MinDuration,
                recognizerConfig.MaxDuration,
                segmentStartOffset);

            double[,] hits = null;
            var prunedEvents = new List <AcousticEvent>();

            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = recognizerConfig.SpeciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
            });

            var thresholdedPlot = new double[amplitudeArray.Length];

            for (int x = 0; x < amplitudeArray.Length; x++)
            {
                if (amplitudeArray[x] > decibelThreshold)
                {
                    thresholdedPlot[x] = amplitudeArray[x];
                }
            }

            var maxDb = amplitudeArray.MaxOrDefault();

            double[] normalisedScores;
            double   normalisedThreshold;

            DataTools.Normalise(thresholdedPlot, decibelThreshold, out normalisedScores, out normalisedThreshold);
            var text = string.Format($"{this.DisplayName} (Fullscale={maxDb:f1}dB)");
            var plot = new Plot(text, normalisedScores, normalisedThreshold);

            if (true)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    plot, amplPlot
                };

                // NOTE: This DrawDebugImage() method can be over-written in this class.
                var debugImage = DrawDebugImage(sonogram, acousticEvents, debugPlots, hits);
                var debugPath  = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");
                debugImage.Save(debugPath);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = acousticEvents,
            });
        }
        ///  <summary>
        ///  ################ THE KEY ANALYSIS METHOD for TRILLS
        ///
        ///  See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles.
        ///  </summary>
        /// <param name="recording"></param>
        /// <param name="sonoConfig"></param>
        /// <param name="lwConfig"></param>
        /// <param name="returnDebugImage"></param>
        /// <param name="segmentStartOffset"></param>
        /// <returns></returns>
        private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LitoriaWatjulumConfig lwConfig,
            bool returnDebugImage,
            TimeSpan segmentStartOffset)
        {
            double intensityThreshold = lwConfig.IntensityThreshold;
            double minDuration        = lwConfig.MinDurationOfTrill; // seconds
            double maxDuration        = lwConfig.MaxDurationOfTrill; // seconds
            double minPeriod          = lwConfig.MinPeriod;          // seconds
            double maxPeriod          = lwConfig.MaxPeriod;          // seconds

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            //TimeSpan tsRecordingtDuration = recording.Duration();
            int    sr              = recording.SampleRate;
            double freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double framesPerSecond = freqBinWidth;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double dctDuration = 4 * maxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);

            //int colCount = sonogram.Data.GetLength(1);

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            //lowerArray = DataTools.filterMovingAverage(lowerArray, 3);
            //upperArray = DataTools.filterMovingAverage(upperArray, 3);

            double[] amplitudeScores  = DataTools.SumMinusDifference(lowerArray, upperArray);
            double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7);

            // Could smooth here rather than above. Above seemed slightly better?
            //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7);
            //differenceScores = DataTools.filterMovingAverage(differenceScores, 7);

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS
            var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lwConfig.LowerBandMinHz,
                lwConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                lwConfig.DecibelThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            for (int i = 0; i < differenceScores.Length; i++)
            {
                if (differenceScores[i] < 1.0)
                {
                    differenceScores[i] = 0.0;
                }
            }

            // LOOK FOR TRILL EVENTS
            // init the score array
            double[] scores = new double[rowCount];

            // var hits = new double[rowCount, colCount];
            double[,] hits = null;

            // init confirmed events
            var confirmedEvents = new List <AcousticEvent>();

            // add names into the returned events
            foreach (var ae in predictedTrillEvents)
            {
                int    eventStart       = ae.Oblong.RowTop;
                int    eventWidth       = ae.Oblong.RowWidth;
                int    step             = 2;
                double maximumIntensity = 0.0;

                // scan the event to get oscillation period and intensity
                for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step)
                {
                    // Look for oscillations in the difference array
                    double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength);
                    double   oscilFreq;
                    double   period;
                    double   intensity;
                    Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity);

                    bool periodWithinBounds = period > minPeriod && period < maxPeriod;

                    //Console.WriteLine($"step={i}    period={period:f4}");

                    if (!periodWithinBounds)
                    {
                        continue;
                    }

                    for (int j = 0; j < dctLength; j++) //lay down score for sample length
                    {
                        if (scores[i + j] < intensity)
                        {
                            scores[i + j] = intensity;
                        }
                    }

                    if (maximumIntensity < intensity)
                    {
                        maximumIntensity = intensity;
                    }
                }

                // add abbreviatedSpeciesName into event
                if (maximumIntensity >= intensityThreshold)
                {
                    ae.Name             = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}";
                    ae.Score_MaxInEvent = maximumIntensity;
                    ae.Profile          = lwConfig.ProfileNames[0];
                    confirmedEvents.Add(ae);
                }
            }

            //######################################################################
            // LOOK FOR TINK EVENTS
            // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            double minDurationOfTink = lwConfig.MinDurationOfTink;  // seconds
            double maxDurationOfTink = lwConfig.MaxDurationOfTink;  // seconds

            // want stronger threshold for tink because brief.
            double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0;
            var    predictedTinkEvents  = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lwConfig.LowerBandMinHz,
                lwConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                tinkDecibelThreshold,
                minDurationOfTink,
                maxDurationOfTink,
                segmentStartOffset);

            foreach (var ae2 in predictedTinkEvents)
            {
                // Prune the list of potential acoustic events, for example using Cosine Similarity.

                //rowtop,  rowWidth
                //int eventStart = ae2.Oblong.RowTop;
                //int eventWidth = ae2.Oblong.RowWidth;
                //int step = 2;
                //double maximumIntensity = 0.0;

                // add abbreviatedSpeciesName into event
                //if (maximumIntensity >= intensityThreshold)
                //{
                ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}";

                //ae2.Score_MaxInEvent = maximumIntensity;
                ae2.Profile = lwConfig.ProfileNames[1];
                confirmedEvents.Add(ae2);

                //}
            }

            //######################################################################

            var   scorePlot  = new Plot(lwConfig.SpeciesName, scores, intensityThreshold);
            Image debugImage = null;

            if (returnDebugImage)
            {
                // display a variety of debug score arrays
                double[] normalisedScores;
                double   normalisedThreshold;
                DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold);
                var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold);
                DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold);
                var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, sumDiffPlot, differencePlot
                };
                debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits);
            }

            // return new sonogram because it makes for more easy interpretation of the image
            var returnSonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = 512,
                WindowOverlap = 0,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader);

            return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage));
        } //Analysis()
        internal RecognizerResults Gruntwork(AudioRecording audioRecording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset)
        {
            double noiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.1;

            // make a spectrogram
            var config = new SonogramConfig
            {
                WindowSize              = 256,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            config.WindowOverlap = 0.0;

            // now construct the standard decibel spectrogram WITH noise removal, and look for LimConvex
            // get frame parameters for the analysis
            var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader);

            // remove the DC column
            var spg        = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);
            int sampleRate = audioRecording.SampleRate;
            int rowCount   = spg.GetLength(0);
            int colCount   = spg.GetLength(1);

            int    frameSize          = config.WindowSize;
            int    frameStep          = frameSize; // this default = zero overlap
            double frameStepInSeconds = frameStep / (double)sampleRate;
            double framesPerSec       = 1 / frameStepInSeconds;

            // reading in variables from the config file
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";
            int    minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int    maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // ## THREE THRESHOLDS ---- only one of these is given to user.
            // minimum dB to register a dominant freq peak. After noise removal
            double peakThresholdDb = 3.0;

            // The threshold dB amplitude in the dominant freq bin required to yield an event
            double eventThresholdDb = 6;

            // minimum score for an acceptable event - that is when processing the score array.
            double similarityThreshold = configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold) ?? 0.2;

            // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 256.
            int    minFrameWidth = 7;
            int    maxFrameWidth = 14;
            double minDuration   = (minFrameWidth - 1) * frameStepInSeconds;
            double maxDuration   = maxFrameWidth * frameStepInSeconds;

            // Calculate Max Amplitude
            int binMin = (int)Math.Round(minHz / sonogram.FBinWidth);
            int binMax = (int)Math.Round(maxHz / sonogram.FBinWidth);

            int[]    dominantBins = new int[rowCount];    // predefinition of events max frequency
            double[] scores       = new double[rowCount]; // predefinition of score array
            double[,] hits = new double[rowCount, colCount];

            // loop through all spectra/rows of the spectrogram - NB: spg is rotated to vertical.
            // mark the hits in hitMatrix
            for (int s = 0; s < rowCount; s++)
            {
                double[] spectrum     = MatrixTools.GetRow(spg, s);
                double   maxAmplitude = double.MinValue;
                int      maxId        = 0;

                // loop through bandwidth of L.onvex call and look for dominant frequency
                for (int binID = 5; binID < binMax; binID++)
                {
                    if (spectrum[binID] > maxAmplitude)
                    {
                        maxAmplitude = spectrum[binID];
                        maxId        = binID;
                    }
                }

                if (maxId < binMin)
                {
                    continue;
                }

                // peak should exceed thresold amplitude
                if (spectrum[maxId] < peakThresholdDb)
                {
                    continue;
                }

                scores[s]       = maxAmplitude;
                dominantBins[s] = maxId;

                // Console.WriteLine("Col {0}, Bin {1}  ", c, freqBinID);
            } // loop through all spectra

            // Find average amplitude

            double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(
                sonogram.Data,
                0,
                binMin,
                rowCount - 1,
                binMax);

            var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7);

            // We now have a list of potential hits for C. tinnula. This needs to be filtered.
            var startEnds = new List <Point>();

            Plot.FindStartsAndEndsOfScoreEvents(highPassFilteredSignal, eventThresholdDb, minFrameWidth, maxFrameWidth, out var prunedScores, out startEnds);

            // High pass Filter

            // loop through the score array and find beginning and end of potential events
            var potentialEvents = new List <AcousticEvent>();

            foreach (Point point in startEnds)
            {
                // get average of the dominant bin
                int binSum     = 0;
                int binCount   = 0;
                int eventWidth = point.Y - point.X + 1;
                for (int s = point.X; s <= point.Y; s++)
                {
                    if (dominantBins[s] >= binMin)
                    {
                        binSum += dominantBins[s];
                        binCount++;
                    }
                }

                // find average dominant bin for the event
                int avDominantBin  = (int)Math.Round(binSum / (double)binCount);
                int avDominantFreq = (int)(Math.Round(binSum / (double)binCount) * sonogram.FBinWidth);

                // Get score for the event.
                // Use a simple template for the honk and calculate cosine similarity to the template.
                // Template has three dominant frequenices.
                // minimum number of bins covering frequency bandwidth of C. tinnula call// minimum number of bins covering frequency bandwidth of L.convex call
                int    callBinWidth = 14;
                var    templates    = GetCtinnulaTemplates(callBinWidth);
                var    eventMatrix  = MatrixTools.Submatrix(spg, point.X, avDominantBin - callBinWidth + 2, point.Y, avDominantBin + 1);
                double eventScore   = GetEventScore(eventMatrix, templates);

                // put hits into hits matrix
                // put cosine score into the score array
                for (int s = point.X; s <= point.Y; s++)
                {
                    hits[s, avDominantBin] = 10;
                    prunedScores[s]        = eventScore;
                }

                if (eventScore < similarityThreshold)
                {
                    continue;
                }

                int topBinForEvent    = avDominantBin + 2;
                int bottomBinForEvent = topBinForEvent - callBinWidth;

                double startTime    = point.X * frameStepInSeconds;
                double durationTime = eventWidth * frameStepInSeconds;
                var    newEvent     = new AcousticEvent(segmentStartOffset, startTime, durationTime, minHz, maxHz);
                newEvent.DominantFreq = avDominantFreq;
                newEvent.Score        = eventScore;
                newEvent.SetTimeAndFreqScales(framesPerSec, sonogram.FBinWidth);
                newEvent.Name = string.Empty; // remove name because it hides spectral content of the event.

                potentialEvents.Add(newEvent);
            }

            // display the original score array
            scores = DataTools.normalise(scores);
            var debugPlot = new Plot(this.DisplayName, scores, similarityThreshold);

            // DEBUG IMAGE this recognizer only. MUST set false for deployment.
            bool displayDebugImage = MainEntry.InDEBUG;

            if (displayDebugImage)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(amplitudeArray, eventThresholdDb, out var normalisedScores, out var normalisedThreshold);
                var ampltdPlot = new Plot("Average amplitude", normalisedScores, normalisedThreshold);

                DataTools.Normalise(highPassFilteredSignal, eventThresholdDb, out normalisedScores, out normalisedThreshold);
                var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold);

                /*
                 * DataTools.Normalise(scores, eventThresholdDb, out normalisedScores, out normalisedThreshold);
                 * var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold);
                 *
                 *
                 * DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold);
                 * var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold);
                 */
                var debugPlots = new List <Plot> {
                    ampltdPlot, demeanedPlot
                };
                Image debugImage = DisplayDebugImage(sonogram, potentialEvents, debugPlots, null);
                var   debugPath  = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(audioRecording.BaseName), this.Identifier, "png", "DebugSpectrogram"));
                debugImage.Save(debugPath.FullName);
            }

            // display the cosine similarity scores
            var plot  = new Plot(this.DisplayName, prunedScores, similarityThreshold);
            var plots = new List <Plot> {
                plot
            };

            // add names into the returned events
            foreach (AcousticEvent ae in potentialEvents)
            {
                ae.Name = "speciesName"; // abbreviatedSpeciesName;
            }

            return(new RecognizerResults()
            {
                Events = potentialEvents,
                Hits = hits,
                Plots = plots,
                Sonogram = sonogram,
            });
        }
        /// <summary>
        /// THE KEY ANALYSIS METHOD.
        /// </summary>
        private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LewinsRailConfig lrConfig,
            bool returnDebugImage,
            TimeSpan segmentStartOffset)
        {
            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            int sr = recording.SampleRate;

            int upperBandMinHz = lrConfig.UpperBandMinHz;
            int upperBandMaxHz = lrConfig.UpperBandMaxHz;
            int lowerBandMinHz = lrConfig.LowerBandMinHz;
            int lowerBandMaxHz = lrConfig.LowerBandMaxHz;

            //double decibelThreshold = lrConfig.DecibelThreshold;   //dB
            //int windowSize = lrConfig.WindowSize;
            double eventThreshold = lrConfig.EventThreshold; //in 0-1
            double minDuration    = lrConfig.MinDuration;    // seconds
            double maxDuration    = lrConfig.MaxDuration;    // seconds
            double minPeriod      = lrConfig.MinPeriod;      // seconds
            double maxPeriod      = lrConfig.MaxPeriod;      // seconds

            //double freqBinWidth = sr / (double)windowSize;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;

            //i: MAKE SONOGRAM
            double framesPerSecond = freqBinWidth;

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700

            int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            int step         = (int)Math.Round(framesPerSecond); //take one second steps
            int stepCount    = rowCount / step;
            int sampleLength = 64;                               //64 frames = 3.7 seconds. Suitable for Lewins Rail.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES
            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength)
                {
                    break;
                }

                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 3;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if (period < minPeriod || period > maxPeriod)
                {
                    continue;
                }

                // lay down score for sample length
                for (int j = 0; j < sampleLength; j++)
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }

                    periodicity[start + j] = period;
                }
            }

            //######################################################################

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 5);

            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerBandMinHz,
                upperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray, segmentStartOffset);
            var hits = new double[rowCount, colCount];

            //######################################################################

            var   scorePlot  = new Plot("L.pect", intensity, lrConfig.IntensityThreshold);
            Image debugImage = null;

            if (returnDebugImage)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(intensity, lrConfig.DecibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var intensityPlot = new Plot("Intensity", normalisedScores, normalisedThreshold);
                DataTools.Normalise(periodicity, 10, out normalisedScores, out normalisedThreshold);
                var periodicityPlot = new Plot("Periodicity", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, intensityPlot, periodicityPlot
                };
                debugImage = DrawDebugImage(sonogram, predictedEvents, debugPlots, hits);
            }

            return(Tuple.Create(sonogram, hits, intensity, predictedEvents, debugImage));
        } //Analysis()
        } //Analysis()

        public static System.Tuple <List <double[]>, double[, ], List <AcousticEvent> > DetectKiwi(BaseSonogram sonogram, int minHz, int maxHz,
                                                                                                   /* double dctDuration, double dctThreshold, */ double minPeriod, double maxPeriod, double eventThreshold, double minDuration, double maxDuration)
        {
            int step         = (int)Math.Round(sonogram.FramesPerSecond); //take one second steps
            int sampleLength = 32;                                        //32 frames = 1.85 seconds.   64 frames (i.e. 3.7 seconds) is to long a sample - require stationarity.

            int    rowCount       = sonogram.Data.GetLength(0);
            int    colCount       = sonogram.Data.GetLength(1);
            double minFramePeriod = minPeriod * sonogram.FramesPerSecond;
            double maxFramePeriod = maxPeriod * sonogram.FramesPerSecond;

            int minBin = (int)(minHz / sonogram.FBinWidth);
            int maxBin = (int)(maxHz / sonogram.FBinWidth);


            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz
            double[] fullArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, (rowCount - 1), minBin + 130);
            var      result1   = CrossCorrelation.DetectXcorrelationInTwoArrays(fullArray, fullArray, step, sampleLength, minFramePeriod, maxFramePeriod);

            double[] intensity1   = result1.Item1;
            double[] periodicity1 = result1.Item2;
            intensity1 = DataTools.filterMovingAverage(intensity1, 11);

            //#############################################################################################################################################
            //double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, (rowCount - 1), minBin + 65);
            //double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin+66, (rowCount - 1), minBin+130);
            //int actualMaxHz     = (int)Math.Round((minBin+130) * sonogram.FBinWidth);
            //var result2 = CrossCorrelation.DetectXcorrelationInTwoArrays(lowerArray, upperArray, step, sampleLength, minFramePeriod, maxFramePeriod);
            //double[] intensity2   = result2.Item1;
            //double[] periodicity2 = result2.Item2;
            //intensity2 = DataTools.filterMovingAverage(intensity2, 5);

            //#############################################################################################################################################
            //minFramePeriod = 4;
            //maxFramePeriod = 14;
            //var return3 = Gratings.ScanArrayForGratingPattern(fullArray, (int)minFramePeriod, (int)maxFramePeriod, 4, step);
            //var return3 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 4);
            //var return4 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 5);
            //var return5 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 8);
            //var return6 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 10);
            //var return7 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 12);

            //#############################################################################################################################################
            //bool normaliseDCT = true;
            //Double[,] maleHits;                       //predefinition of hits matrix - to superimpose on sonogram image
            //double[] maleScores;                      //predefinition of score array
            //double[] maleOscRate;
            //List<AcousticEvent> predictedMaleEvents;
            //double minOscilFreq = 1 / maxPeriod;  //convert max period (seconds) to oscilation rate (Herz).
            //double maxOscilFreq = 1 / minPeriod;  //convert min period (seconds) to oscilation rate (Herz).
            //OscillationAnalysis.Execute((SpectralSonogram)sonogram, minHz, maxHz, dctDuration, dctThreshold, normaliseDCT,
            //                             minOscilFreq, maxOscilFreq, eventThreshold, minDuration, maxDuration,
            //                             out maleScores, out predictedMaleEvents, out maleHits, out maleOscRate);

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            List <AcousticEvent> events = AcousticEvent.ConvertScoreArray2Events(intensity1, minHz, maxHz, sonogram.FramesPerSecond, sonogram.FBinWidth,
                                                                                 eventThreshold, minDuration, maxDuration);

            CropEvents(events, fullArray, minDuration);
            CalculateAvIntensityScore(events, intensity1);
            CalculateDeltaPeriodScore(events, periodicity1, minFramePeriod, maxFramePeriod);
            CalculateBandWidthScore(events, sonogram.Data);
            CalculatePeaksScore(events, fullArray);
            //FilterEvents(events);
            CalculateWeightedEventScore(events);

            // PREPARE HITS MATRIX
            var    hits  = new double[rowCount, colCount];
            double range = maxFramePeriod - minFramePeriod;

            for (int r = 0; r < rowCount; r++)
            {
                if (intensity1[r] > eventThreshold)
                {
                    for (int c = minBin; c < maxBin; c++)
                    {
                        hits[r, c] = (periodicity1[r] - minFramePeriod) / range; //normalisation
                    }
                }
            }

            periodicity1 = CropArrayToEvents(events, periodicity1); //for display only

            var scores = new List <double[]>();

            scores.Add(DataTools.normalise(fullArray));
            //scores.Add(DataTools.normalise(upperArray));
            //scores.Add(DataTools.normalise(lowerArray));
            scores.Add(DataTools.normalise(intensity1));
            scores.Add(DataTools.normalise(periodicity1));
            //scores.Add(DataTools.normalise(intensity2));
            //scores.Add(DataTools.normalise(return3));
            //scores.Add(DataTools.normalise(return4));
            //scores.Add(DataTools.normalise(return5));
            //scores.Add(DataTools.normalise(return6));
            //scores.Add(DataTools.normalise(return7));
            //scores.Add(DataTools.normalise(maleScores));
            //scores.Add(DataTools.normalise(maleOscRate));
            return(System.Tuple.Create(scores, hits, events));
        }
Example #6
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            var recognizerConfig = new CriniaRemotaConfig();

            recognizerConfig.ReadConfigFile(configuration);

            // BETTER TO SET THESE. IGNORE USER!
            // this default framesize seems to work
            const int    frameSize     = 256;
            const double windowOverlap = 0.25;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // use the default HAMMING window
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),

                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.None
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.0,
            };

            TimeSpan recordingDuration = recording.WavReader.Time;
            int      sr               = recording.SampleRate;
            double   freqBinWidth     = sr / (double)sonoConfig.WindowSize;
            int      minBin           = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1;
            int      maxBin           = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1;
            var      decibelThreshold = 6.0;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            int rowCount = sonogram.Data.GetLength(0);

            double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);
            double[] topBand        = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, maxBin + 3, rowCount - 1, maxBin + 9);
            double[] botBand        = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin - 3, rowCount - 1, minBin - 9);
            double[] diffArray      = new double[amplitudeArray.Length];
            for (int i = 0; i < amplitudeArray.Length; i++)
            {
                diffArray[i] = amplitudeArray[i] - topBand[i] - botBand[i];
                if (diffArray[i] < 1.0)
                {
                    diffArray[i] = 0.0;
                }
            }

            bool[] peakArray = new bool[amplitudeArray.Length];
            for (int i = 1; i < diffArray.Length - 1; i++)
            {
                if (diffArray[i] < decibelThreshold)
                {
                    continue;
                }

                if (diffArray[i] > diffArray[i - 1] && diffArray[i] > diffArray[i + 1])
                {
                    peakArray[i] = true;
                }
            }

            // calculate score array based on density of peaks
            double frameDuration = (double)frameSize / sr;

            // use a stimulus-decay function
            double durationOfDecayTail = 0.35; // seconds
            int    lengthOfDecayTail   = (int)Math.Round(durationOfDecayTail / frameDuration);
            double decayrate           = 0.95;

            //double decay = -0.05;
            //double fractionalDecay = Math.Exp(decay * lengthOfDecayTail);
            // the above setting gives decay of 0.22 over 0.35 seconds or 30 frames.

            double score = 0.0;
            int    locationOfLastPeak = 0;

            double[] peakScores = new double[amplitudeArray.Length];
            for (int p = 0; p < peakScores.Length - 1; p++)
            {
                if (!peakArray[p])
                {
                    int distanceFromLastpeak = p - locationOfLastPeak;

                    // score decay
                    score *= decayrate;

                    // remove the decay tail
                    if (score < 0.5 && distanceFromLastpeak > lengthOfDecayTail && p >= lengthOfDecayTail)
                    {
                        score = 0.0;
                        for (int j = 0; j < lengthOfDecayTail; j++)
                        {
                            peakScores[p - j] = score;
                        }
                    }
                }
                else
                {
                    locationOfLastPeak = p;
                    score += 0.8;
                }

                peakScores[p] = score;
            }

            var events = AcousticEvent.ConvertScoreArray2Events(
                peakScores,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                recognizerConfig.EventThreshold,
                recognizerConfig.MinDuration,
                recognizerConfig.MaxDuration,
                segmentStartOffset);

            double[,] hits = null;

            var prunedEvents = new List <AcousticEvent>();

            foreach (var ae in events)
            {
                if (ae.EventDurationSeconds < recognizerConfig.MinDuration || ae.EventDurationSeconds > recognizerConfig.MaxDuration)
                {
                    continue;
                }

                // add additional info
                ae.SpeciesName            = recognizerConfig.SpeciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // do a recognizer test.
            if (MainEntry.InDEBUG)
            {
                // var testDir = new DirectoryInfo(outputDirectory.Parent.Parent.FullName);
                // TestTools.RecognizerScoresTest(recording.BaseName, testDir, recognizerConfig.AnalysisName, peakScores);
                // AcousticEvent.TestToCompareEvents(recording.BaseName, testDir, recognizerConfig.AnalysisName, prunedEvents);
            }

            var plot = new Plot(this.DisplayName, peakScores, recognizerConfig.EventThreshold);

            if (false)
            {
                // display a variety of debug score arrays
                double[] normalisedScores;
                double   normalisedThreshold;
                DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold);
                DataTools.Normalise(diffArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var diffPlot = new Plot("Diff plot", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    plot, amplPlot, diffPlot
                };

                // NOTE: This DrawDebugImage() method can be over-written in this class.
                var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits);
                var debugPath  = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");
                debugImage.Save(debugPath);
            }

            return(new RecognizerResults
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = prunedEvents,

                //Events = events
            });
        } // Recognize()
Example #7
0
        /// <summary>
        /// THE KEY ANALYSIS METHOD.
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LitoriaBicolorConfig lbConfig,
            bool drawDebugImage,
            TimeSpan segmentStartOffset)
        {
            double decibelThreshold   = lbConfig.DecibelThreshold; //dB
            double intensityThreshold = lbConfig.IntensityThreshold;

            //double eventThreshold = lbConfig.EventThreshold; //in 0-1

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            //TimeSpan tsRecordingtDuration = recording.Duration();
            int    sr              = recording.SampleRate;
            double freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double framesPerSecond = freqBinWidth;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double dctDuration = 3 * lbConfig.MaxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            //lowerArray = DataTools.filterMovingAverage(lowerArray, 3);
            //upperArray = DataTools.filterMovingAverage(upperArray, 3);

            double[] amplitudeScores  = DataTools.SumMinusDifference(lowerArray, upperArray);
            double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0);

            // Could smooth here rather than above. Above seemed slightly better?
            amplitudeScores  = DataTools.filterMovingAverage(amplitudeScores, 7);
            differenceScores = DataTools.filterMovingAverage(differenceScores, 7);

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lbConfig.LowerBandMinHz,
                lbConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                decibelThreshold,
                lbConfig.MinDuration,
                lbConfig.MaxDuration,
                segmentStartOffset);

            for (int i = 0; i < differenceScores.Length; i++)
            {
                if (differenceScores[i] < 1.0)
                {
                    differenceScores[i] = 0.0;
                }
            }

            // init the score array
            double[] scores = new double[rowCount];

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            // var hits = new double[rowCount, colCount];
            double[,] hits = null;

            // init confirmed events
            var confirmedEvents = new List <AcousticEvent>();

            // add names into the returned events
            foreach (var ae in predictedEvents)
            {
                //rowtop,  rowWidth
                int    eventStart       = ae.Oblong.RowTop;
                int    eventWidth       = ae.Oblong.RowWidth;
                int    step             = 2;
                double maximumIntensity = 0.0;

                // scan the event to get oscillation period and intensity
                for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step)
                {
                    // Look for oscillations in the difference array
                    double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength);
                    Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity);

                    bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod;

                    //Console.WriteLine($"step={i}    period={period:f4}");

                    if (!periodWithinBounds)
                    {
                        continue;
                    }

                    // lay down score for sample length
                    for (int j = 0; j < dctLength; j++)
                    {
                        if (scores[i + j] < intensity)
                        {
                            scores[i + j] = intensity;
                        }
                    }

                    if (maximumIntensity < intensity)
                    {
                        maximumIntensity = intensity;
                    }
                }

                // add abbreviatedSpeciesName into event
                if (maximumIntensity >= intensityThreshold)
                {
                    ae.Name             = "L.b";
                    ae.Score_MaxInEvent = maximumIntensity;
                    confirmedEvents.Add(ae);
                }
            }

            //######################################################################

            // calculate the cosine similarity scores
            var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold);

            //DEBUG IMAGE this recognizer only. MUST set false for deployment.
            Image debugImage = null;

            if (drawDebugImage)
            {
                // display a variety of debug score arrays

                //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold);
                //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold);
                //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold);
                DataTools.Normalise(amplitudeScores, decibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold);
                DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold);
                var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, sumDiffPlot, differencePlot
                };

                // other debug plots
                //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot };
                debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits);
            }

            // return new sonogram because it makes for more easy interpretation of the image
            var returnSonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = 512,
                WindowOverlap = 0,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader);

            return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage));
        } //Analysis()
Example #8
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            string       speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            string       abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";
            const int    frameSize     = 256;
            const double windowOverlap = 0.0;

            double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0;

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // ignore oscillations below this threshold freq
            int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq);

            // ignore oscillations above this threshold freq
            int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq);

            // duration of DCT in seconds
            //double dctDuration = (double)configuration[AnalysisKeys.DctDuration];

            // minimum acceptable value of a DCT coefficient
            double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in seconds
            double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            double decibelThreshold = configuration.GetDouble(AnalysisKeys.DecibelThreshold);

            // min score for an acceptable event
            double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            if (recording.WavReader.SampleRate != 22050)
            {
                throw new InvalidOperationException("Requires a 22050Hz file");
            }

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName             = recording.BaseName,
                WindowSize              = frameSize,
                WindowOverlap           = windowOverlap,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            var    recordingDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;
            int    minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int    maxBin       = (int)Math.Round(maxHz / freqBinWidth) + 1;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double framesPerSecond = freqBinWidth;
            double minPeriod       = 1 / (double)maxOscilFreq;
            double maxPeriod       = 1 / (double)minOscilFreq;
            double dctDuration     = 5 * maxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);

            double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            // remove baseline from amplitude array
            var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7);

            // remove hi freq content from amplitude array
            var lowPassFilteredSignal = DataTools.filterMovingAverageOdd(amplitudeArray, 11);

            var       dctScores = new double[highPassFilteredSignal.Length];
            const int step      = 2;

            for (int i = dctLength; i < highPassFilteredSignal.Length - dctLength; i += step)
            {
                if (highPassFilteredSignal[i] < decibelThreshold)
                {
                    continue;
                }

                double[] subArray = DataTools.Subarray(highPassFilteredSignal, i, dctLength);

                // Look for oscillations in the highPassFilteredSignal
                Oscillations2014.GetOscillationUsingDct(subArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity);
                bool periodWithinBounds = period > minPeriod && period < maxPeriod;

                if (!periodWithinBounds)
                {
                    continue;
                }

                if (intensity < dctThreshold)
                {
                    continue;
                }

                //lay down score for sample length
                for (int j = 0; j < dctLength; j++)
                {
                    if (dctScores[i + j] < intensity && lowPassFilteredSignal[i + j] > decibelThreshold)
                    {
                        dctScores[i + j] = intensity;
                    }
                }
            }

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
                dctScores,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            // ######################################################################
            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = speciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.Name = abbreviatedSpeciesName;
            });

            var plot  = new Plot(this.DisplayName, dctScores, eventThreshold);
            var plots = new List <Plot> {
                plot
            };

            // DEBUG IMAGE this recognizer only. MUST set false for deployment.
            bool displayDebugImage = MainEntry.InDEBUG;

            if (displayDebugImage)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(amplitudeArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold);
                DataTools.Normalise(highPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold);

                DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    ampltdPlot, lowPassPlot, demeanedPlot, plot
                };
                Image debugImage = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, debugPlots, null);
                var   debugPath  = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram"));
                debugImage.Save(debugPath.FullName);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = null,
                Plots = plots,
                Events = acousticEvents,
            });
        }
        } //Analyze()

        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD
        /// Returns a DataTable
        /// </summary>
        /// <param name="fiSegmentOfSourceFile"></param>
        /// <param name="configDict"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="diOutputDir"></param>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values - ignor those set by user
            int    frameSize     = 1024;
            double windowOverlap = 0.0;

            int    upperBandMinHz     = int.Parse(configDict[KeyUpperfreqbandBtm]);
            int    upperBandMaxHz     = int.Parse(configDict[KeyUpperfreqbandTop]);
            int    lowerBandMinHz     = int.Parse(configDict[KeyLowerfreqbandBtm]);
            int    lowerBandMaxHz     = int.Parse(configDict[KeyLowerfreqbandTop]);
            double decibelThreshold   = double.Parse(configDict[KeyDecibelThreshold]);;  //dB
            double intensityThreshold = double.Parse(configDict[KeyIntensityThreshold]); //in 0-1
            double minDuration        = double.Parse(configDict[KeyMinDuration]);        // seconds
            double maxDuration        = double.Parse(configDict[KeyMaxDuration]);        // seconds
            double minPeriod          = double.Parse(configDict[KeyMinPeriod]);          // seconds
            double maxPeriod          = double.Parse(configDict[KeyMaxPeriod]);          // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName   = recording.BaseName;
            sonoConfig.WindowSize    = frameSize;
            sonoConfig.WindowOverlap = windowOverlap;
            //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE");
            sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD");
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = freqBinWidth;

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700

            int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, (rowCount - 1), lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, (rowCount - 1), upperBandMaxBin);

            int step         = (int)Math.Round(framesPerSecond); //take one second steps
            int stepCount    = rowCount / step;
            int sampleLength = 64;                               //64 frames = 3.7 seconds. Suitable for Lewins Rail.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES
            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if ((lowerSubarray.Length != sampleLength) || (upperSubarray.Length != sampleLength))
                {
                    break;
                }
                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 3;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;                                  //in real data these bins are dominant and hide other frequency content
                }
                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if ((period < minPeriod) || (period > maxPeriod))
                {
                    continue;
                }
                for (int j = 0; j < sampleLength; j++) //lay down score for sample length
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }
                    periodicity[start + j] = period;
                }
            }
            //######################################################################

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 5);
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerBandMinHz,
                upperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray);
            var hits = new double[rowCount, colCount];

            return(Tuple.Create(sonogram, hits, intensity, predictedEvents, tsRecordingtDuration));
        } //Analysis()
Example #10
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            // common properties
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no name>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // BETTER TO CALCULATE THIS. IGNORE USER!
            // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]);
            // duration of DCT in seconds
            //double dctDuration = (double)configuration[AnalysisKeys.DctDuration];

            // minimum acceptable value of a DCT coefficient
            //double dctThreshold = (double)configuration[AnalysisKeys.DctThreshold];
            double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0;
            double decibelThreshold        = configuration.GetDouble("DecibelThreshold");

            //double minPeriod = (double)configuration["MinPeriod"]; //: 0.18
            //double maxPeriod = (double)configuration["MaxPeriod"]; //

            //int maxOscilRate = (int)Math.Ceiling(1 /minPeriod);
            //int minOscilRate = (int)Math.Floor(1 /maxPeriod);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in second
            var maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            var eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            // this default framesize and overlap is best for the White Hrron of Bhutan.
            const int frameSize     = 2048;
            double    windowOverlap = 0.0;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            var    recordingDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;
            int    minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int    maxBin       = (int)Math.Round(maxHz / freqBinWidth) + 1;

            /* #############################################################################################################################################
             * window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
             * 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
             * 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
             * 2048     17640      116.1ms           8.6         8.6    7430ms           551hz          1100hz
             * 2048     22050       92.8ms          21.5        10.7666 1472ms
             */

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            // var templates = GetTemplatesForAlgorithm1(14);
            var amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            bool[] peakArray       = new bool[rowCount];
            var    amplitudeScores = new double[rowCount];
            var    hits            = new double[rowCount, colCount];

            const int maxTemplateLength  = 20;
            const int templateEndPadding = 7;
            const int templateOffset     = 14;
            const int minimumGap         = 4;
            const int maximumGap         = 100;

            // first find the amplitude peaks
            for (int j = 1; j < amplitudeArray.Length - 1; j++)
            {
                if (amplitudeArray[j] < decibelThreshold)
                {
                    continue;
                }

                if (amplitudeArray[j] > amplitudeArray[j - 1] && amplitudeArray[j] > amplitudeArray[j + 1])
                {
                    peakArray[j] = true;
                }
            }

            // get template for end of Herron call
            var endTemplate = GetEndTemplateForAlgorithm2();

            // now search for peaks that are the correct distance apart.
            for (int i = 2; i < amplitudeArray.Length - maxTemplateLength - templateEndPadding; i++)
            {
                if (!peakArray[i])
                {
                    continue;
                }

                // calculate distance to next peak
                int distanceToNextPeak = CalculateDistanceToNextPeak(peakArray, i);

                // skip gaps that are too small or too large
                if (distanceToNextPeak < minimumGap || distanceToNextPeak > maximumGap)
                {
                    continue;
                }

                // The herron call ends with a rising whip
                // Check end of call using end template
                if (distanceToNextPeak > maxTemplateLength)
                {
                    int start = i - templateOffset;
                    if (start < 0)
                    {
                        start = 0;
                    }

                    var    endLocality = DataTools.Subarray(amplitudeArray, start, endTemplate.Length);
                    double endScore    = DataTools.CosineSimilarity(endLocality, endTemplate);
                    for (int to = -templateOffset; to < endTemplate.Length - templateOffset; to++)
                    {
                        if (i + to >= 0 && endScore > amplitudeScores[i + to])
                        {
                            amplitudeScores[i + to] = endScore;

                            // hits[i, minBin] = 10;
                        }
                    }

                    for (int k = 2; k < maxTemplateLength; k++)
                    {
                        amplitudeScores[i + k] = 0.0;
                    }

                    continue;
                }

                // Get the start template which depends on distance to next peak.
                var startTemplate = GetTemplateForAlgorithm2(distanceToNextPeak, templateEndPadding);

                // now calculate similarity of locality with the startTemplate
                var    locality = DataTools.Subarray(amplitudeArray, i - 2, startTemplate.Length); // i-2 because first two places should be zero.
                double score    = DataTools.CosineSimilarity(locality, startTemplate);
                for (int t = 0; t < startTemplate.Length; t++)
                {
                    if (score > amplitudeScores[i + t])
                    {
                        amplitudeScores[i + t] = score;
                        hits[i, minBin]        = 10;
                    }
                }
            } // loop over peak array

            var smoothedScores = DataTools.filterMovingAverageOdd(amplitudeScores, 3);

            // iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                smoothedScores,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            var prunedEvents = new List <AcousticEvent>();

            foreach (var ae in predictedEvents)
            {
                if (ae.EventDurationSeconds < minDuration)
                {
                    continue;
                }

                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = abbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // do a recognizer test.
            //CompareArrayWithBenchmark(scores, new FileInfo(recording.FilePath));
            //CompareArrayWithBenchmark(prunedEvents, new FileInfo(recording.FilePath));

            var plot = new Plot(this.DisplayName, amplitudeScores, eventThreshold);

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = prunedEvents,
            });
        }