public static double GetEventScore(double[,] eventMatrix, List <double[]> templates)
        {
            double[] eventAsVector = MatrixTools.SumColumns(eventMatrix);

            // need to reverse vector because template starts at the high freq end which is the fixed reference bin.
            eventAsVector = DataTools.reverseArray(eventAsVector);
            double maxScore = -double.MaxValue;

            foreach (double[] template in templates)
            {
                double eventScore = DataTools.CosineSimilarity(template, eventAsVector);
                if (maxScore < eventScore)
                {
                    maxScore = eventScore;
                }
            }

            return(maxScore);
        }
        private static void GetEventScore(double[] eventAsVector, List <double[]> templates, out double score, out int id)
        {
            // need to reverse vector because template starts at the high freq end which is the fixed reference bin.
            eventAsVector = DataTools.reverseArray(eventAsVector);
            double maxScore = -double.MaxValue;

            id = 0;
            for (int i = 0; i < templates.Count; i++)
            {
                double[] template   = templates[i];
                double   eventScore = DataTools.CosineSimilarity(template, eventAsVector);

                // double eventScore = DataTools.PatternSimilarity(template, eventAsVector);
                if (maxScore < eventScore)
                {
                    maxScore = eventScore;
                    id       = i + 1;
                }
            }

            // square the score to increase score contrast
            score = maxScore * maxScore;
        }
        /// <summary>
        /// Algorithm2:
        /// 1: Loop through spgm and find dominant freq bin and its amplitude in each frame
        /// 2: If frame passes amplitude test, then calculate a similarity cosine score for that frame. Simlarity score is wrt a template matrix.
        /// 3: If similarity score exceeds threshold, then assign event score based on the amplitude.
        /// </summary>
        internal RecognizerResults Algorithm2(AudioRecording recording, Config configuration, DirectoryInfo outputDirectory, TimeSpan segmentStartOffset)
        {
            double noiseReductionParameter = configuration.GetDoubleOrNull("BgNoiseThreshold") ?? 0.1;

            // make a spectrogram
            var config = new SonogramConfig
            {
                WindowSize              = 256,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
                WindowOverlap           = 0.0,
            };

            // now construct the standard decibel spectrogram WITH noise removal
            // get frame parameters for the analysis
            var sonogram = (BaseSonogram) new SpectrogramStandard(config, recording.WavReader);

            // remove the DC column
            var spg = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);

            sonogram.Data = spg;
            int sampleRate = recording.SampleRate;
            int rowCount   = spg.GetLength(0);
            int colCount   = spg.GetLength(1);

            //double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1);
            int frameSize = colCount * 2;
            int frameStep = frameSize; // this default = zero overlap

            //double frameDurationInSeconds = frameSize / (double)sampleRate;
            double frameStepInSeconds = frameStep / (double)sampleRate;
            double framesPerSec       = 1 / frameStepInSeconds;
            double herzPerBin         = sampleRate / 2.0 / colCount;

            //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // ## THREE THRESHOLDS ---- only one of these is given to user.
            // minimum dB to register a dominant freq peak. After noise removal
            double peakThresholdDb = 3.0;

            // The threshold dB amplitude in the dominant freq bin required to yield an event
            double eventDecibelThreshold = configuration.GetDoubleOrNull("EventDecibelThreshold") ?? 6.0;

            // minimum score for an acceptable event - that is when processing the score array.
            double eventSimilarityThreshold = configuration.GetDoubleOrNull("EventSimilarityThreshold") ?? 0.2;

            // IMPORTANT: The following frame durations assume a sampling rate = 22050 and window size of 512.
            //int minFrameWidth = 2;
            //int maxFrameWidth = 5;  // this is larger than actual to accomodate an echo.
            //double minDuration = (minFrameWidth - 1) * frameStepInSeconds;
            //double maxDuration = maxFrameWidth * frameStepInSeconds;

            // minimum number of frames and bins covering the call
            // The PlatyplectrumOrnatum call has a duration of 3-5 frames GIVEN THE ABOVE SAMPLING and WINDOW SETTINGS!

            // Get the call templates and their dimensions
            var templates = GetTemplatesForAlgorithm2(out var callFrameDuration, out var callBinWidth);

            int dominantFrequency = configuration.GetInt("DominantFrequency");

            const int hzBuffer       = 100;
            int       dominantBin    = (int)Math.Round(dominantFrequency / herzPerBin);
            int       binBuffer      = (int)Math.Round(hzBuffer / herzPerBin);
            int       dominantBinMin = dominantBin - binBuffer;
            int       dominantBinMax = dominantBin + binBuffer;
            int       bottomBin      = 1;
            int       topBin         = bottomBin + callBinWidth - 1;

            int[]    dominantBins     = new int[rowCount];    // predefinition of events max frequency
            double[] similarityScores = new double[rowCount]; // predefinition of score array
            double[] amplitudeScores  = new double[rowCount];
            double[,] hits = new double[rowCount, colCount];

            // loop through all spectra/rows of the spectrogram
            // NB: the spectrogram is rotated to vertical, i.e. rows = spectra, columns= freq bins mark the hits in hitMatrix
            for (int s = 1; s < rowCount - callFrameDuration; s++)
            {
                double[] spectrum     = MatrixTools.GetRow(spg, s);
                double   maxAmplitude = -double.MaxValue;
                int      maxId        = 0;

                // loop through bandwidth of call and look for dominant frequency
                for (int binId = 8; binId <= dominantBinMax; binId++)
                {
                    if (spectrum[binId] > maxAmplitude)
                    {
                        maxAmplitude = spectrum[binId];
                        maxId        = binId;
                    }
                }

                if (maxId < dominantBinMin)
                {
                    continue;
                }

                // peak should exceed thresold amplitude
                if (spectrum[maxId] < peakThresholdDb)
                {
                    continue;
                }

                //now calculate similarity with template
                var    locality      = MatrixTools.Submatrix(spg, s - 1, bottomBin, s + callFrameDuration - 2, topBin); // s-1 because first row of template is zeros.
                int    localMaxBin   = maxId - bottomBin;
                double callAmplitude = (locality[1, localMaxBin] + locality[2, localMaxBin] + locality[3, localMaxBin]) / 3.0;

                // use the following lines to write out call templates for use as recognizer
                //double[] columnSums = MatrixTools.SumColumns(locality);
                //if (columnSums[maxId - bottomBin] < 80) continue;
                //FileTools.WriteMatrix2File(locality, "E:\\SensorNetworks\\Output\\Frogs\\TestOfRecognizers-2016October\\Towsey.PlatyplectrumOrnatum\\Locality_S"+s+".csv");

                double score = DataTools.CosineSimilarity(locality, templates[0]);
                if (score > eventSimilarityThreshold)
                {
                    similarityScores[s] = score;
                    dominantBins[s]     = maxId;
                    amplitudeScores[s]  = callAmplitude;
                }
            } // loop through all spectra

            // loop through all spectra/rows of the spectrogram for a second time
            // NB: the spectrogram is rotated to vertical, i.e. rows = spectra, columns= freq bins
            // We now have a list of potential hits. This needs to be filtered. Mark the hits in hitMatrix
            var events = new List <AcousticEvent>();

            for (int s = 1; s < rowCount - callFrameDuration; s++)
            {
                // find peaks in the array of similarity scores. First step, only look for peaks
                if (similarityScores[s] < similarityScores[s - 1] || similarityScores[s] < similarityScores[s + 1])
                {
                    continue;
                }

                // require three consecutive similarity scores to be above the threshold
                if (similarityScores[s + 1] < eventSimilarityThreshold || similarityScores[s + 2] < eventSimilarityThreshold)
                {
                    continue;
                }

                // now check the amplitude
                if (amplitudeScores[s] < eventDecibelThreshold)
                {
                    continue;
                }

                // have an event
                // find average dominant bin for the event
                int avDominantBin      = (dominantBins[s] + dominantBins[s] + dominantBins[s]) / 3;
                int avDominantFreq     = (int)Math.Round(avDominantBin * herzPerBin);
                int topBinForEvent     = avDominantBin + 3;
                int bottomBinForEvent  = topBinForEvent - callBinWidth;
                int topFreqForEvent    = (int)Math.Round(topBinForEvent * herzPerBin);
                int bottomFreqForEvent = (int)Math.Round(bottomBinForEvent * herzPerBin);

                hits[s, avDominantBin] = 10;

                double startTime    = s * frameStepInSeconds;
                double durationTime = 4 * frameStepInSeconds;
                var    newEvent     = new AcousticEvent(segmentStartOffset, startTime, durationTime, bottomFreqForEvent, topFreqForEvent)
                {
                    DominantFreq = avDominantFreq,
                    Score        = amplitudeScores[s],

                    // remove name because it hides spectral content in display of the event.
                    Name = string.Empty,
                };
                newEvent.SetTimeAndFreqScales(framesPerSec, herzPerBin);

                events.Add(newEvent);
            } // loop through all spectra

            // display the amplitude scores
            DataTools.Normalise(amplitudeScores, eventDecibelThreshold, out var normalisedScores, out var normalisedThreshold);
            var plot  = new Plot(this.DisplayName, normalisedScores, normalisedThreshold);
            var plots = new List <Plot> {
                plot
            };

            //DEBUG IMAGE this recognizer only. MUST set false for deployment.
            bool displayDebugImage = MainEntry.InDEBUG;

            if (displayDebugImage)
            {
                // display the original decibel score array
                var debugPlot  = new Plot("Similarity Score", similarityScores, eventSimilarityThreshold);
                var debugPlots = new List <Plot> {
                    plot, debugPlot
                };
                var debugImage = DisplayDebugImage(sonogram, events, debugPlots, hits);
                var debugPath  = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram"));
                debugImage.Save(debugPath.FullName);
            }

            // add names into the returned events
            foreach (var ae in events)
            {
                ae.Name = "P.o"; // abbreviatedSpeciesName;
            }

            return(new RecognizerResults()
            {
                Events = events,
                Hits = hits,
                Plots = plots,
                Sonogram = sonogram,
            });
        }
示例#4
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            // common properties
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no name>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // BETTER TO CALCULATE THIS. IGNORE USER!
            // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]);
            // duration of DCT in seconds
            //double dctDuration = (double)configuration[AnalysisKeys.DctDuration];

            // minimum acceptable value of a DCT coefficient
            //double dctThreshold = (double)configuration[AnalysisKeys.DctThreshold];
            double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0;
            double decibelThreshold        = configuration.GetDouble("DecibelThreshold");

            //double minPeriod = (double)configuration["MinPeriod"]; //: 0.18
            //double maxPeriod = (double)configuration["MaxPeriod"]; //

            //int maxOscilRate = (int)Math.Ceiling(1 /minPeriod);
            //int minOscilRate = (int)Math.Floor(1 /maxPeriod);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in second
            var maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            var eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            // this default framesize and overlap is best for the White Hrron of Bhutan.
            const int frameSize     = 2048;
            double    windowOverlap = 0.0;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            var    recordingDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;
            int    minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int    maxBin       = (int)Math.Round(maxHz / freqBinWidth) + 1;

            /* #############################################################################################################################################
             * window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
             * 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
             * 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
             * 2048     17640      116.1ms           8.6         8.6    7430ms           551hz          1100hz
             * 2048     22050       92.8ms          21.5        10.7666 1472ms
             */

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            // var templates = GetTemplatesForAlgorithm1(14);
            var amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            bool[] peakArray       = new bool[rowCount];
            var    amplitudeScores = new double[rowCount];
            var    hits            = new double[rowCount, colCount];

            const int maxTemplateLength  = 20;
            const int templateEndPadding = 7;
            const int templateOffset     = 14;
            const int minimumGap         = 4;
            const int maximumGap         = 100;

            // first find the amplitude peaks
            for (int j = 1; j < amplitudeArray.Length - 1; j++)
            {
                if (amplitudeArray[j] < decibelThreshold)
                {
                    continue;
                }

                if (amplitudeArray[j] > amplitudeArray[j - 1] && amplitudeArray[j] > amplitudeArray[j + 1])
                {
                    peakArray[j] = true;
                }
            }

            // get template for end of Herron call
            var endTemplate = GetEndTemplateForAlgorithm2();

            // now search for peaks that are the correct distance apart.
            for (int i = 2; i < amplitudeArray.Length - maxTemplateLength - templateEndPadding; i++)
            {
                if (!peakArray[i])
                {
                    continue;
                }

                // calculate distance to next peak
                int distanceToNextPeak = CalculateDistanceToNextPeak(peakArray, i);

                // skip gaps that are too small or too large
                if (distanceToNextPeak < minimumGap || distanceToNextPeak > maximumGap)
                {
                    continue;
                }

                // The herron call ends with a rising whip
                // Check end of call using end template
                if (distanceToNextPeak > maxTemplateLength)
                {
                    int start = i - templateOffset;
                    if (start < 0)
                    {
                        start = 0;
                    }

                    var    endLocality = DataTools.Subarray(amplitudeArray, start, endTemplate.Length);
                    double endScore    = DataTools.CosineSimilarity(endLocality, endTemplate);
                    for (int to = -templateOffset; to < endTemplate.Length - templateOffset; to++)
                    {
                        if (i + to >= 0 && endScore > amplitudeScores[i + to])
                        {
                            amplitudeScores[i + to] = endScore;

                            // hits[i, minBin] = 10;
                        }
                    }

                    for (int k = 2; k < maxTemplateLength; k++)
                    {
                        amplitudeScores[i + k] = 0.0;
                    }

                    continue;
                }

                // Get the start template which depends on distance to next peak.
                var startTemplate = GetTemplateForAlgorithm2(distanceToNextPeak, templateEndPadding);

                // now calculate similarity of locality with the startTemplate
                var    locality = DataTools.Subarray(amplitudeArray, i - 2, startTemplate.Length); // i-2 because first two places should be zero.
                double score    = DataTools.CosineSimilarity(locality, startTemplate);
                for (int t = 0; t < startTemplate.Length; t++)
                {
                    if (score > amplitudeScores[i + t])
                    {
                        amplitudeScores[i + t] = score;
                        hits[i, minBin]        = 10;
                    }
                }
            } // loop over peak array

            var smoothedScores = DataTools.filterMovingAverageOdd(amplitudeScores, 3);

            // iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                smoothedScores,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            var prunedEvents = new List <AcousticEvent>();

            foreach (var ae in predictedEvents)
            {
                if (ae.EventDurationSeconds < minDuration)
                {
                    continue;
                }

                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = abbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // do a recognizer test.
            //CompareArrayWithBenchmark(scores, new FileInfo(recording.FilePath));
            //CompareArrayWithBenchmark(prunedEvents, new FileInfo(recording.FilePath));

            var plot = new Plot(this.DisplayName, amplitudeScores, eventThreshold);

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = prunedEvents,
            });
        }