/// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            var recognizerConfig = new LitoriaCaeruleaConfig();

            recognizerConfig.ReadConfigFile(configuration);

            // common properties
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no name>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // BETTER TO SET THESE. IGNORE USER!
            // This framesize is large because the oscillation we wish to detect is due to repeated croaks
            // having an interval of about 0.6 seconds. The overlap is also required to give smooth oscillation.
            const int    frameSize     = 2048;
            const double windowOverlap = 0.5;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // use the default HAMMING window
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),

                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.None
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.0,
            };

            TimeSpan recordingDuration = recording.WavReader.Time;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = sr / (sonoConfig.WindowSize * (1 - windowOverlap));

            //int dominantFreqBin = (int)Math.Round(recognizerConfig.DominantFreq / freqBinWidth) + 1;
            int minBin           = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1;
            int maxBin           = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1;
            var decibelThreshold = 9.0;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            int rowCount = sonogram.Data.GetLength(0);

            // get the freq band as set by min and max Herz
            var frogBand = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            // Now look for spectral maxima. For L.caerulea, the max should lie around 1100Hz +/-150 Hz.
            // Skip over spectra where maximum is not in correct location.
            int buffer            = 150;
            var croakScoreArray   = new double[rowCount];
            var hzAtTopOfTopBand  = recognizerConfig.DominantFreq + buffer;
            var hzAtBotOfTopBand  = recognizerConfig.DominantFreq - buffer;
            var binAtTopOfTopBand = (int)Math.Round((hzAtTopOfTopBand - recognizerConfig.MinHz) / freqBinWidth);
            var binAtBotOfTopBand = (int)Math.Round((hzAtBotOfTopBand - recognizerConfig.MinHz) / freqBinWidth);

            // scan the frog band and get the decibel value of those spectra which have their maximum within the correct subband.
            for (int x = 0; x < rowCount; x++)
            {
                //extract spectrum
                var spectrum = MatrixTools.GetRow(frogBand, x);
                int maxIndex = DataTools.GetMaxIndex(spectrum);
                if (spectrum[maxIndex] < decibelThreshold)
                {
                    continue;
                }

                if (maxIndex < binAtTopOfTopBand && maxIndex > binAtBotOfTopBand)
                {
                    croakScoreArray[x] = spectrum[maxIndex];
                }
            }

            // Perpare a normalised plot for later display with spectrogram
            double[] normalisedScores;
            double   normalisedThreshold;

            DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
            var text1      = string.Format($"Croak scores (threshold={decibelThreshold})");
            var croakPlot1 = new Plot(text1, normalisedScores, normalisedThreshold);

            // extract potential croak events from the array of croak candidate
            var croakEvents = AcousticEvent.ConvertScoreArray2Events(
                croakScoreArray,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                recognizerConfig.EventThreshold,
                recognizerConfig.MinCroakDuration,
                recognizerConfig.MaxCroakDuration,
                segmentStartOffset);

            // add necesary info into the candidate events
            var prunedEvents = new List <AcousticEvent>();

            foreach (var ae in croakEvents)
            {
                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // With those events that survive the above Array2Events process, we now extract a new array croak scores
            croakScoreArray = AcousticEvent.ExtractScoreArrayFromEvents(prunedEvents, rowCount, recognizerConfig.AbbreviatedSpeciesName);
            DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
            var text2      = string.Format($"Croak events (threshold={decibelThreshold})");
            var croakPlot2 = new Plot(text2, normalisedScores, normalisedThreshold);

            // Look for oscillations in the difference array
            // duration of DCT in seconds
            //croakScoreArray = DataTools.filterMovingAverageOdd(croakScoreArray, 5);
            double dctDuration = recognizerConfig.DctDuration;

            // minimum acceptable value of a DCT coefficient
            double dctThreshold = recognizerConfig.DctThreshold;
            double minOscRate   = 1 / recognizerConfig.MaxPeriod;
            double maxOscRate   = 1 / recognizerConfig.MinPeriod;
            var    dctScores    = Oscillations2012.DetectOscillations(croakScoreArray, framesPerSecond, dctDuration, minOscRate, maxOscRate, dctThreshold);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            var events = AcousticEvent.ConvertScoreArray2Events(
                dctScores,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                recognizerConfig.EventThreshold,
                recognizerConfig.MinDuration,
                recognizerConfig.MaxDuration,
                segmentStartOffset);

            double[,] hits = null;
            prunedEvents   = new List <AcousticEvent>();
            foreach (var ae in events)
            {
                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // do a recognizer test.
            if (MainEntry.InDEBUG)
            {
                //TestTools.RecognizerScoresTest(scores, new FileInfo(recording.FilePath));
                //AcousticEvent.TestToCompareEvents(prunedEvents, new FileInfo(recording.FilePath));
            }

            var scoresPlot = new Plot(this.DisplayName, dctScores, recognizerConfig.EventThreshold);

            if (true)
            {
                // display a variety of debug score arrays
                // calculate amplitude at location
                double[] amplitudeArray = MatrixTools.SumRows(frogBand);
                DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scoresPlot, croakPlot2, croakPlot1, amplPlot
                };

                // NOTE: This DrawDebugImage() method can be over-written in this class.
                var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits);
                var debugPath  = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");
                debugImage.Save(debugPath);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = scoresPlot.AsList(),
                Events = prunedEvents,

                //Events = events
            });
        }
Ejemplo n.º 2
0
        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD.
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values - ignore those set by user
            int    frameSize     = 128;
            double windowOverlap = 0.5;

            double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double minDuration        = double.Parse(configDict["MIN_DURATION"]);        // seconds
            double maxDuration        = double.Parse(configDict["MAX_DURATION"]);        // seconds
            double minPeriod          = double.Parse(configDict["MIN_PERIOD"]);          // seconds
            double maxPeriod          = double.Parse(configDict["MAX_PERIOD"]);          // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameSize,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            }; //default values config

            //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE");
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   frameOffset     = sonoConfig.GetFrameOffset(sr);
            double   framesPerSecond = 1 / frameOffset;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 256      17640       14.5ms          68.9        68.9    ms          hz          hz
            // 512      17640       29.0ms          34.4        34.4    ms          hz          hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //The Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            // Assuming sr=17640 and window=256, then binWidth = 68.9Hz and 1500Hz = bin 21.7..
            // Therefore do a Xcorrelation between bins 21 and 22.
            // Number of frames to span must power of 2. Try 16 frames which covers 232ms - almost 1/4 second.

            int midHz    = 1500;
            int lowerBin = (int)(midHz / freqBinWidth) + 1;  //because bin[0] = DC
            int upperBin = lowerBin + 4;
            int lowerHz  = (int)Math.Floor((lowerBin - 1) * freqBinWidth);
            int upperHz  = (int)Math.Ceiling((upperBin - 1) * freqBinWidth);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetColumn(sonogram.Data, lowerBin);
            double[] upperArray = MatrixTools.GetColumn(sonogram.Data, upperBin);
            lowerArray = DataTools.NormaliseInZeroOne(lowerArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB #######################################################################
            upperArray = DataTools.NormaliseInZeroOne(upperArray, 0, 60); //## ABSOLUTE NORMALISATION 0-60 dB #######################################################################

            int step         = (int)(framesPerSecond / 40);               //take one/tenth second steps
            int stepCount    = rowCount / step;
            int sampleLength = 32;                                        //16 frames = 232ms - almost 1/4 second.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES

            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if (lowerSubarray == null || upperSubarray == null)
                {
                    break;
                }

                if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength)
                {
                    break;
                }

                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 2;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if (period < minPeriod || period > maxPeriod)
                {
                    continue;
                }

                // lay down score for sample length
                for (int j = 0; j < sampleLength; j++)
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }

                    periodicity[start + j] = period;
                }
            }

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 3);
            intensity = DataTools.NormaliseInZeroOne(intensity, 0, 0.5); //## ABSOLUTE NORMALISATION 0-0.5 #######################################################################

            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerHz,
                upperHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray, segmentStartOffset);
            var hits = new double[rowCount, colCount];

            var plots = new List <Plot>();

            //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0));
            //plots.Add(new Plot("lowerArray", DataTools.Normalise(lowerArray, 0, 100), 10.0));
            //plots.Add(new Plot("lowerArray", DataTools.NormaliseMatrixValues(lowerArray), 0.25));
            //plots.Add(new Plot("upperArray", DataTools.NormaliseMatrixValues(upperArray), 0.25));
            //plots.Add(new Plot("intensity",  DataTools.NormaliseMatrixValues(intensity), intensityThreshold));
            plots.Add(new Plot("intensity", intensity, intensityThreshold));

            return(Tuple.Create(sonogram, hits, plots, predictedEvents, tsRecordingtDuration));
        } //Analysis()
Ejemplo n.º 3
0
        } //Analysis()

        public static System.Tuple <List <double[]>, double[, ], List <AcousticEvent> > DetectKiwi(BaseSonogram sonogram, int minHz, int maxHz,
                                                                                                   /* double dctDuration, double dctThreshold, */ double minPeriod, double maxPeriod, double eventThreshold, double minDuration, double maxDuration)
        {
            int step         = (int)Math.Round(sonogram.FramesPerSecond); //take one second steps
            int sampleLength = 32;                                        //32 frames = 1.85 seconds.   64 frames (i.e. 3.7 seconds) is to long a sample - require stationarity.

            int    rowCount       = sonogram.Data.GetLength(0);
            int    colCount       = sonogram.Data.GetLength(1);
            double minFramePeriod = minPeriod * sonogram.FramesPerSecond;
            double maxFramePeriod = maxPeriod * sonogram.FramesPerSecond;

            int minBin = (int)(minHz / sonogram.FBinWidth);
            int maxBin = (int)(maxHz / sonogram.FBinWidth);


            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz
            double[] fullArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, (rowCount - 1), minBin + 130);
            var      result1   = CrossCorrelation.DetectXcorrelationInTwoArrays(fullArray, fullArray, step, sampleLength, minFramePeriod, maxFramePeriod);

            double[] intensity1   = result1.Item1;
            double[] periodicity1 = result1.Item2;
            intensity1 = DataTools.filterMovingAverage(intensity1, 11);

            //#############################################################################################################################################
            //double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, (rowCount - 1), minBin + 65);
            //double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin+66, (rowCount - 1), minBin+130);
            //int actualMaxHz     = (int)Math.Round((minBin+130) * sonogram.FBinWidth);
            //var result2 = CrossCorrelation.DetectXcorrelationInTwoArrays(lowerArray, upperArray, step, sampleLength, minFramePeriod, maxFramePeriod);
            //double[] intensity2   = result2.Item1;
            //double[] periodicity2 = result2.Item2;
            //intensity2 = DataTools.filterMovingAverage(intensity2, 5);

            //#############################################################################################################################################
            //minFramePeriod = 4;
            //maxFramePeriod = 14;
            //var return3 = Gratings.ScanArrayForGratingPattern(fullArray, (int)minFramePeriod, (int)maxFramePeriod, 4, step);
            //var return3 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 4);
            //var return4 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 5);
            //var return5 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 8);
            //var return6 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 10);
            //var return7 = Gratings.ScanArrayForGratingPattern(fullArray, step, 4, 12);

            //#############################################################################################################################################
            //bool normaliseDCT = true;
            //Double[,] maleHits;                       //predefinition of hits matrix - to superimpose on sonogram image
            //double[] maleScores;                      //predefinition of score array
            //double[] maleOscRate;
            //List<AcousticEvent> predictedMaleEvents;
            //double minOscilFreq = 1 / maxPeriod;  //convert max period (seconds) to oscilation rate (Herz).
            //double maxOscilFreq = 1 / minPeriod;  //convert min period (seconds) to oscilation rate (Herz).
            //OscillationAnalysis.Execute((SpectralSonogram)sonogram, minHz, maxHz, dctDuration, dctThreshold, normaliseDCT,
            //                             minOscilFreq, maxOscilFreq, eventThreshold, minDuration, maxDuration,
            //                             out maleScores, out predictedMaleEvents, out maleHits, out maleOscRate);

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            List <AcousticEvent> events = AcousticEvent.ConvertScoreArray2Events(intensity1, minHz, maxHz, sonogram.FramesPerSecond, sonogram.FBinWidth,
                                                                                 eventThreshold, minDuration, maxDuration);

            CropEvents(events, fullArray, minDuration);
            CalculateAvIntensityScore(events, intensity1);
            CalculateDeltaPeriodScore(events, periodicity1, minFramePeriod, maxFramePeriod);
            CalculateBandWidthScore(events, sonogram.Data);
            CalculatePeaksScore(events, fullArray);
            //FilterEvents(events);
            CalculateWeightedEventScore(events);

            // PREPARE HITS MATRIX
            var    hits  = new double[rowCount, colCount];
            double range = maxFramePeriod - minFramePeriod;

            for (int r = 0; r < rowCount; r++)
            {
                if (intensity1[r] > eventThreshold)
                {
                    for (int c = minBin; c < maxBin; c++)
                    {
                        hits[r, c] = (periodicity1[r] - minFramePeriod) / range; //normalisation
                    }
                }
            }

            periodicity1 = CropArrayToEvents(events, periodicity1); //for display only

            var scores = new List <double[]>();

            scores.Add(DataTools.normalise(fullArray));
            //scores.Add(DataTools.normalise(upperArray));
            //scores.Add(DataTools.normalise(lowerArray));
            scores.Add(DataTools.normalise(intensity1));
            scores.Add(DataTools.normalise(periodicity1));
            //scores.Add(DataTools.normalise(intensity2));
            //scores.Add(DataTools.normalise(return3));
            //scores.Add(DataTools.normalise(return4));
            //scores.Add(DataTools.normalise(return5));
            //scores.Add(DataTools.normalise(return6));
            //scores.Add(DataTools.normalise(return7));
            //scores.Add(DataTools.normalise(maleScores));
            //scores.Add(DataTools.normalise(maleOscRate));
            return(System.Tuple.Create(scores, hits, events));
        }
        } //Analysis()

        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent> > DetectHarmonics(
            AudioRecording recording,
            double intensityThreshold,
            int minHz,
            int minFormantgap,
            int maxFormantgap,
            double minDuration,
            int windowSize,
            double windowOverlap,
            TimeSpan segmentStartOffset)
        {
            //i: MAKE SONOGRAM
            int    numberOfBins    = 32;
            double binWidth        = recording.SampleRate / (double)windowSize;
            int    sr              = recording.SampleRate;
            double frameDuration   = windowSize / (double)sr;             // Duration of full frame or window in seconds
            double frameOffset     = frameDuration * (1 - windowOverlap); //seconds between starts of consecutive frames
            double framesPerSecond = 1 / frameOffset;

            //double framesPerSecond = sr / (double)windowSize;
            //int frameOffset = (int)(windowSize * (1 - overlap));
            //int frameCount = (length - windowSize + frameOffset) / frameOffset;

            double epsilon  = Math.Pow(0.5, recording.BitsPerSample - 1);
            var    results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(
                recording.WavReader.Samples,
                sr,
                epsilon,
                windowSize,
                windowOverlap);

            double[] avAbsolute = results2.Average; //average absolute value over the minute recording

            //double[] envelope = results2.Item2;
            double[,]
            matrix = results2
                     .AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            double windowPower = results2.WindowPower;

            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int minBin = (int)Math.Round(minHz / binWidth);
            int maxHz  = (int)Math.Round(minHz + (numberOfBins * binWidth));

            int rowCount = matrix.GetLength(0);
            int colCount = matrix.GetLength(1);
            int maxbin   = minBin + numberOfBins;

            double[,] subMatrix = MatrixTools.Submatrix(matrix, 0, minBin + 1, rowCount - 1, maxbin);

            //ii: DETECT HARMONICS
            int zeroBinCount = 5; //to remove low freq content which dominates the spectrum
            var results      = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount);

            double[] intensity   = results.Item1; //an array of periodicity scores
            double[] periodicity = results.Item2;

            //transfer periodicity info to a hits matrix.
            //intensity = DataTools.filterMovingAverage(intensity, 3);
            double[] scoreArray = new double[intensity.Length];
            var      hits       = new double[rowCount, colCount];

            for (int r = 0; r < rowCount; r++)
            {
                double relativePeriod = periodicity[r] / numberOfBins / 2;
                if (intensity[r] > intensityThreshold)
                {
                    for (int c = minBin; c < maxbin; c++)
                    {
                        hits[r, c] = relativePeriod;
                    }
                }

                double herzPeriod = periodicity[r] * binWidth;
                if (herzPeriod > minFormantgap && herzPeriod < maxFormantgap)
                {
                    scoreArray[r] = 2 * intensity[r] * intensity[r]; //enhance high score wrt low score.
                }
            }

            scoreArray = DataTools.filterMovingAverage(scoreArray, 11);

            //iii: CONVERT TO ACOUSTIC EVENTS
            double maxDuration = 100000.0; //abitrary long number - do not want to restrict duration of machine noise
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                scoreArray,
                minHz,
                maxHz,
                framesPerSecond,
                binWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            hits = null;

            //set up the songogram to return. Use the existing amplitude sonogram
            int                bitsPerSample = recording.WavReader.BitsPerSample;
            TimeSpan           duration      = recording.Duration;
            NoiseReductionType nrt           = SNR.KeyToNoiseReductionType("STANDARD");

            var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram(
                recording.BaseName,
                windowSize,
                windowOverlap,
                bitsPerSample,
                windowPower,
                sr,
                duration,
                nrt,
                matrix);

            sonogram.DecibelsNormalised = new double[rowCount];

            //foreach frame or time step
            for (int i = 0; i < rowCount; i++)
            {
                sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]);
            }

            sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised);
            return(Tuple.Create(sonogram, hits, scoreArray, predictedEvents));
        } //end Execute_HDDetect
Ejemplo n.º 5
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            var recognizerConfig = new CriniaRemotaConfig();

            recognizerConfig.ReadConfigFile(configuration);

            // BETTER TO SET THESE. IGNORE USER!
            // this default framesize seems to work
            const int    frameSize     = 256;
            const double windowOverlap = 0.25;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // use the default HAMMING window
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),

                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.None
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.0,
            };

            TimeSpan recordingDuration = recording.WavReader.Time;
            int      sr               = recording.SampleRate;
            double   freqBinWidth     = sr / (double)sonoConfig.WindowSize;
            int      minBin           = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1;
            int      maxBin           = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1;
            var      decibelThreshold = 6.0;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            int rowCount = sonogram.Data.GetLength(0);

            double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);
            double[] topBand        = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, maxBin + 3, rowCount - 1, maxBin + 9);
            double[] botBand        = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin - 3, rowCount - 1, minBin - 9);
            double[] diffArray      = new double[amplitudeArray.Length];
            for (int i = 0; i < amplitudeArray.Length; i++)
            {
                diffArray[i] = amplitudeArray[i] - topBand[i] - botBand[i];
                if (diffArray[i] < 1.0)
                {
                    diffArray[i] = 0.0;
                }
            }

            bool[] peakArray = new bool[amplitudeArray.Length];
            for (int i = 1; i < diffArray.Length - 1; i++)
            {
                if (diffArray[i] < decibelThreshold)
                {
                    continue;
                }

                if (diffArray[i] > diffArray[i - 1] && diffArray[i] > diffArray[i + 1])
                {
                    peakArray[i] = true;
                }
            }

            // calculate score array based on density of peaks
            double frameDuration = (double)frameSize / sr;

            // use a stimulus-decay function
            double durationOfDecayTail = 0.35; // seconds
            int    lengthOfDecayTail   = (int)Math.Round(durationOfDecayTail / frameDuration);
            double decayrate           = 0.95;

            //double decay = -0.05;
            //double fractionalDecay = Math.Exp(decay * lengthOfDecayTail);
            // the above setting gives decay of 0.22 over 0.35 seconds or 30 frames.

            double score = 0.0;
            int    locationOfLastPeak = 0;

            double[] peakScores = new double[amplitudeArray.Length];
            for (int p = 0; p < peakScores.Length - 1; p++)
            {
                if (!peakArray[p])
                {
                    int distanceFromLastpeak = p - locationOfLastPeak;

                    // score decay
                    score *= decayrate;

                    // remove the decay tail
                    if (score < 0.5 && distanceFromLastpeak > lengthOfDecayTail && p >= lengthOfDecayTail)
                    {
                        score = 0.0;
                        for (int j = 0; j < lengthOfDecayTail; j++)
                        {
                            peakScores[p - j] = score;
                        }
                    }
                }
                else
                {
                    locationOfLastPeak = p;
                    score += 0.8;
                }

                peakScores[p] = score;
            }

            var events = AcousticEvent.ConvertScoreArray2Events(
                peakScores,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                recognizerConfig.EventThreshold,
                recognizerConfig.MinDuration,
                recognizerConfig.MaxDuration,
                segmentStartOffset);

            double[,] hits = null;

            var prunedEvents = new List <AcousticEvent>();

            foreach (var ae in events)
            {
                if (ae.EventDurationSeconds < recognizerConfig.MinDuration || ae.EventDurationSeconds > recognizerConfig.MaxDuration)
                {
                    continue;
                }

                // add additional info
                ae.SpeciesName            = recognizerConfig.SpeciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // do a recognizer test.
            if (MainEntry.InDEBUG)
            {
                // var testDir = new DirectoryInfo(outputDirectory.Parent.Parent.FullName);
                // TestTools.RecognizerScoresTest(recording.BaseName, testDir, recognizerConfig.AnalysisName, peakScores);
                // AcousticEvent.TestToCompareEvents(recording.BaseName, testDir, recognizerConfig.AnalysisName, prunedEvents);
            }

            var plot = new Plot(this.DisplayName, peakScores, recognizerConfig.EventThreshold);

            if (false)
            {
                // display a variety of debug score arrays
                double[] normalisedScores;
                double   normalisedThreshold;
                DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold);
                DataTools.Normalise(diffArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var diffPlot = new Plot("Diff plot", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    plot, amplPlot, diffPlot
                };

                // NOTE: This DrawDebugImage() method can be over-written in this class.
                var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits);
                var debugPath  = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");
                debugImage.Save(debugPath);
            }

            return(new RecognizerResults
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = prunedEvents,

                //Events = events
            });
        } // Recognize()
        /// <summary>
        /// THE KEY ANALYSIS METHOD.
        /// </summary>
        private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LewinsRailConfig lrConfig,
            bool returnDebugImage,
            TimeSpan segmentStartOffset)
        {
            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            int sr = recording.SampleRate;

            int upperBandMinHz = lrConfig.UpperBandMinHz;
            int upperBandMaxHz = lrConfig.UpperBandMaxHz;
            int lowerBandMinHz = lrConfig.LowerBandMinHz;
            int lowerBandMaxHz = lrConfig.LowerBandMaxHz;

            //double decibelThreshold = lrConfig.DecibelThreshold;   //dB
            //int windowSize = lrConfig.WindowSize;
            double eventThreshold = lrConfig.EventThreshold; //in 0-1
            double minDuration    = lrConfig.MinDuration;    // seconds
            double maxDuration    = lrConfig.MaxDuration;    // seconds
            double minPeriod      = lrConfig.MinPeriod;      // seconds
            double maxPeriod      = lrConfig.MaxPeriod;      // seconds

            //double freqBinWidth = sr / (double)windowSize;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;

            //i: MAKE SONOGRAM
            double framesPerSecond = freqBinWidth;

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700

            int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            int step         = (int)Math.Round(framesPerSecond); //take one second steps
            int stepCount    = rowCount / step;
            int sampleLength = 64;                               //64 frames = 3.7 seconds. Suitable for Lewins Rail.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES
            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength)
                {
                    break;
                }

                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 3;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if (period < minPeriod || period > maxPeriod)
                {
                    continue;
                }

                // lay down score for sample length
                for (int j = 0; j < sampleLength; j++)
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }

                    periodicity[start + j] = period;
                }
            }

            //######################################################################

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 5);

            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerBandMinHz,
                upperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray, segmentStartOffset);
            var hits = new double[rowCount, colCount];

            //######################################################################

            var   scorePlot  = new Plot("L.pect", intensity, lrConfig.IntensityThreshold);
            Image debugImage = null;

            if (returnDebugImage)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(intensity, lrConfig.DecibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var intensityPlot = new Plot("Intensity", normalisedScores, normalisedThreshold);
                DataTools.Normalise(periodicity, 10, out normalisedScores, out normalisedThreshold);
                var periodicityPlot = new Plot("Periodicity", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, intensityPlot, periodicityPlot
                };
                debugImage = DrawDebugImage(sonogram, predictedEvents, debugPlots, hits);
            }

            return(Tuple.Create(sonogram, hits, intensity, predictedEvents, debugImage));
        } //Analysis()
Ejemplo n.º 7
0
        //#IntensityThreshold: 0.15
        //# Event threshold - Determines FP / FN trade-off for events.
        //EventThreshold: 0.2

        public static (List <AcousticEvent>, double[]) GetComponentsWithHarmonics(
            SpectrogramStandard sonogram,
            int minHz,
            int maxHz,
            int nyquist,
            double decibelThreshold,
            double dctThreshold,
            double minDuration,
            double maxDuration,
            int minFormantGap,
            int maxFormantGap,
            TimeSpan segmentStartOffset)
        {
            // Event threshold - Determines FP / FN trade-off for events.
            //double eventThreshold = 0.2;

            var sonogramData = sonogram.Data;
            int frameCount   = sonogramData.GetLength(0);
            int binCount     = sonogramData.GetLength(1);

            double freqBinWidth  = nyquist / (double)binCount;
            int    minBin        = (int)Math.Round(minHz / freqBinWidth);
            int    maxBin        = (int)Math.Round(maxHz / freqBinWidth);
            int    bandWidthBins = maxBin - minBin + 1;

            // extract the sub-band
            double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, frameCount - 1, maxBin);

            //ii: DETECT HARMONICS
            // now look for harmonics in search band using the Xcorrelation-DCT method.
            var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold);

            // set up score arrays
            double[] dBArray = results.Item1;
            double[] harmonicIntensityScores = results.Item2; //an array of formant intesnity
            int[]    maxIndexArray           = results.Item3;

            for (int r = 0; r < frameCount; r++)
            {
                if (harmonicIntensityScores[r] < dctThreshold)
                {
                    continue;
                }

                //ignore locations with incorrect formant gap
                int    maxId      = maxIndexArray[r];
                double freqBinGap = 2 * bandWidthBins / (double)maxId;
                double formantGap = freqBinGap * freqBinWidth;
                if (formantGap < minFormantGap || formantGap > maxFormantGap)
                {
                    harmonicIntensityScores[r] = 0.0;
                }
            }

            // smooth the harmonicIntensityScores array to allow for brief gaps.
            harmonicIntensityScores = DataTools.filterMovingAverageOdd(harmonicIntensityScores, 5);

            //extract the events based on length and threshhold.
            // Note: This method does NOT do prior smoothing of the score array.
            var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
                harmonicIntensityScores,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                sonogram.FBinWidth,
                decibelThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            return(acousticEvents, harmonicIntensityScores);
        }
        } // FELTWithBinaryTemplate()

        /// <summary>
        /// Scans a recording given a dicitonary of parameters and a syntactic template
        /// Template has a different orientation to others.
        /// </summary>
        /// <param name="sonogram"></param>
        /// <param name="dict"></param>
        /// <param name="templateMatrix"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="recording"></param>
        /// <param name="templatePath"></param>
        /// <returns></returns>
        public static Tuple <SpectrogramStandard, List <AcousticEvent>, double[]> FELTWithSprTemplate(SpectrogramStandard sonogram, Dictionary <string, string> dict, char[,] templateMatrix, TimeSpan segmentStartOffset)
        {
            //i: get parameters from dicitonary
            string callName       = dict[FeltTemplate_Create.key_CALL_NAME];
            bool   doSegmentation = bool.Parse(dict[FeltTemplate_Create.key_DO_SEGMENTATION]);
            double smoothWindow   = double.Parse(dict[FeltTemplate_Create.key_SMOOTH_WINDOW]);       //before segmentation
            int    minHz          = int.Parse(dict[FeltTemplate_Create.key_MIN_HZ]);
            int    maxHz          = int.Parse(dict[FeltTemplate_Create.key_MAX_HZ]);
            double minDuration    = double.Parse(dict[FeltTemplate_Create.key_MIN_DURATION]);        //min duration of event in seconds
            double dBThreshold    = double.Parse(dict[FeltTemplate_Create.key_DECIBEL_THRESHOLD]);   // = 9.0; // dB threshold

            dBThreshold = 4.0;
            int binCount = (int)(maxHz / sonogram.FBinWidth) - (int)(minHz / sonogram.FBinWidth) + 1;

            Log.WriteLine("Freq band: {0} Hz - {1} Hz. (Freq bin count = {2})", minHz, maxHz, binCount);

            //ii: TEMPLATE INFO
            double templateDuration = templateMatrix.GetLength(0) / sonogram.FramesPerSecond;

            Log.WriteIfVerbose("Template duration = {0:f3} seconds or {1} frames.", templateDuration, templateMatrix.GetLength(0));
            Log.WriteIfVerbose("Min Duration: " + minDuration + " seconds");

            //iii: DO SEGMENTATION
            double segmentationThreshold = 2.0;             // Standard deviations above backgorund noise
            double maxDuration           = double.MaxValue; // Do not constrain maximum length of events.
            var    tuple1        = AcousticEvent.GetSegmentationEvents((SpectrogramStandard)sonogram, doSegmentation, segmentStartOffset, minHz, maxHz, smoothWindow, segmentationThreshold, minDuration, maxDuration);
            var    segmentEvents = tuple1.Item1;

            //iv: Score sonogram for events matching template
            //#############################################################################################################################################
            var tuple2 = FindMatchingEvents.Execute_Spr_Match(templateMatrix, sonogram, segmentEvents, minHz, maxHz, dBThreshold);
            //var tuple2 = FindMatchingEvents.Execute_StewartGage(target, dynamicRange, (SpectralSonogram)sonogram, segmentEvents, minHz, maxHz, minDuration);
            //var tuple2 = FindMatchingEvents.Execute_SobelEdges(target, dynamicRange, (SpectralSonogram)sonogram, segmentEvents, minHz, maxHz, minDuration);
            //var tuple2 = FindMatchingEvents.Execute_MFCC_XCOR(target, dynamicRange, sonogram, segmentEvents, minHz, maxHz, minDuration);
            var scores = tuple2.Item1;

            //#############################################################################################################################################

            //v: PROCESS SCORE ARRAY
            //scores = DataTools.filterMovingAverage(scores, 3);
            LoggedConsole.WriteLine("Scores: min={0:f4}, max={1:f4}, threshold={2:f2}dB", scores.Min(), scores.Max(), dBThreshold);
            //Set (scores < 0.0) = 0.0;
            for (int i = 0; i < scores.Length; i++)
            {
                if (scores[i] < 0.0)
                {
                    scores[i] = 0.0;
                }
            }

            //vi: EXTRACT EVENTS
            List <AcousticEvent> matchEvents = AcousticEvent.ConvertScoreArray2Events(scores, minHz, maxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, dBThreshold,
                                                                                      minDuration, maxDuration,
                                                                                      segmentStartOffset);

            foreach (AcousticEvent ev in matchEvents)
            {
                ev.FileName = sonogram.Configuration.SourceFName;
                ev.Name     = sonogram.Configuration.CallName;
            }

            // Edit the events to correct the start time, duration and end of events to match the max score and length of the template.
            AdjustEventLocation(matchEvents, callName, templateDuration, sonogram.Duration.TotalSeconds);

            return(Tuple.Create(sonogram, matchEvents, scores));
        } // FELTWithSprTemplate()
        /// <summary>
        /// Calculates the mean intensity in a freq band defined by its min and max freq.
        /// THis method averages dB log values incorrectly but it is faster than doing many log conversions.
        /// This method is used to find acoustic events and is accurate enough for the purpose.
        /// </summary>
        public static (List <AcousticEvent>, double[]) GetWhistles(
            SpectrogramStandard sonogram,
            int minHz,
            int maxHz,
            int nyquist,
            double decibelThreshold,
            double minDuration,
            double maxDuration,
            TimeSpan segmentStartOffset)
        {
            var sonogramData = sonogram.Data;
            int frameCount   = sonogramData.GetLength(0);
            int binCount     = sonogramData.GetLength(1);

            double binWidth = nyquist / (double)binCount;
            int    minBin   = (int)Math.Round(minHz / binWidth);
            int    maxBin   = (int)Math.Round(maxHz / binWidth);

            // list of accumulated acoustic events
            var events = new List <AcousticEvent>();
            var combinedIntensityArray = new double[frameCount];

            // for all frequency bins except top and bottom
            for (int bin = minBin + 1; bin < maxBin; bin++)
            {
                // set up an intensity array for the frequency bin.
                double[] intensity = new double[frameCount];

                // buffer zone around whistle is four bins wide.
                if (minBin < 4)
                {
                    // for all time frames in this frequency bin
                    for (int t = 0; t < frameCount; t++)
                    {
                        var bandIntensity        = (sonogramData[t, bin - 1] + sonogramData[t, bin] + sonogramData[t, bin + 1]) / 3.0;
                        var topSideBandIntensity = (sonogramData[t, bin + 3] + sonogramData[t, bin + 4] + sonogramData[t, bin + 5]) / 3.0;
                        intensity[t] = bandIntensity - topSideBandIntensity;
                        intensity[t] = Math.Max(0.0, intensity[t]);
                    }
                }
                else
                {
                    // for all time frames in this frequency bin
                    for (int t = 0; t < frameCount; t++)
                    {
                        var bandIntensity           = (sonogramData[t, bin - 1] + sonogramData[t, bin] + sonogramData[t, bin + 1]) / 3.0;
                        var topSideBandIntensity    = (sonogramData[t, bin + 3] + sonogramData[t, bin + 4] + sonogramData[t, bin + 5]) / 6.0;
                        var bottomSideBandIntensity = (sonogramData[t, bin - 3] + sonogramData[t, bin - 4] + sonogramData[t, bin - 5]) / 6.0;
                        intensity[t] = bandIntensity - topSideBandIntensity - bottomSideBandIntensity;
                        intensity[t] = Math.Max(0.0, intensity[t]);
                    }
                }

                // smooth the decibel array to allow for brief gaps.
                intensity = DataTools.filterMovingAverageOdd(intensity, 7);

                //calculate the Hertz bounds of the acoustic events for these freq bins
                int bottomHzBound = (int)Math.Floor(sonogram.FBinWidth * (bin - 1));
                int topHzBound    = (int)Math.Ceiling(sonogram.FBinWidth * (bin + 2));

                //extract the events based on length and threshhold.
                // Note: This method does NOT do prior smoothing of the dB array.
                var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
                    intensity,
                    bottomHzBound,
                    topHzBound,
                    sonogram.FramesPerSecond,
                    sonogram.FBinWidth,
                    decibelThreshold,
                    minDuration,
                    maxDuration,
                    segmentStartOffset);

                // add to conbined intensity array
                for (int t = 0; t < frameCount; t++)
                {
                    //combinedIntensityArray[t] += intensity[t];
                    combinedIntensityArray[t] = Math.Max(intensity[t], combinedIntensityArray[t]);
                }

                // combine events
                events.AddRange(acousticEvents);
            } //end for all freq bins

            // combine adjacent acoustic events
            events = AcousticEvent.CombineOverlappingEvents(events, segmentStartOffset);

            return(events, combinedIntensityArray);
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            string       speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            string       abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";
            const int    frameSize     = 256;
            const double windowOverlap = 0.0;

            double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0;

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // ignore oscillations below this threshold freq
            int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq);

            // ignore oscillations above this threshold freq
            int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq);

            // duration of DCT in seconds
            //double dctDuration = (double)configuration[AnalysisKeys.DctDuration];

            // minimum acceptable value of a DCT coefficient
            double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in seconds
            double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            double decibelThreshold = configuration.GetDouble(AnalysisKeys.DecibelThreshold);

            // min score for an acceptable event
            double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            if (recording.WavReader.SampleRate != 22050)
            {
                throw new InvalidOperationException("Requires a 22050Hz file");
            }

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName             = recording.BaseName,
                WindowSize              = frameSize,
                WindowOverlap           = windowOverlap,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            var    recordingDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;
            int    minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int    maxBin       = (int)Math.Round(maxHz / freqBinWidth) + 1;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double framesPerSecond = freqBinWidth;
            double minPeriod       = 1 / (double)maxOscilFreq;
            double maxPeriod       = 1 / (double)minOscilFreq;
            double dctDuration     = 5 * maxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);

            double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            // remove baseline from amplitude array
            var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7);

            // remove hi freq content from amplitude array
            var lowPassFilteredSignal = DataTools.filterMovingAverageOdd(amplitudeArray, 11);

            var       dctScores = new double[highPassFilteredSignal.Length];
            const int step      = 2;

            for (int i = dctLength; i < highPassFilteredSignal.Length - dctLength; i += step)
            {
                if (highPassFilteredSignal[i] < decibelThreshold)
                {
                    continue;
                }

                double[] subArray = DataTools.Subarray(highPassFilteredSignal, i, dctLength);

                // Look for oscillations in the highPassFilteredSignal
                Oscillations2014.GetOscillationUsingDct(subArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity);
                bool periodWithinBounds = period > minPeriod && period < maxPeriod;

                if (!periodWithinBounds)
                {
                    continue;
                }

                if (intensity < dctThreshold)
                {
                    continue;
                }

                //lay down score for sample length
                for (int j = 0; j < dctLength; j++)
                {
                    if (dctScores[i + j] < intensity && lowPassFilteredSignal[i + j] > decibelThreshold)
                    {
                        dctScores[i + j] = intensity;
                    }
                }
            }

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
                dctScores,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            // ######################################################################
            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = speciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.Name = abbreviatedSpeciesName;
            });

            var plot  = new Plot(this.DisplayName, dctScores, eventThreshold);
            var plots = new List <Plot> {
                plot
            };

            // DEBUG IMAGE this recognizer only. MUST set false for deployment.
            bool displayDebugImage = MainEntry.InDEBUG;

            if (displayDebugImage)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(amplitudeArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold);
                DataTools.Normalise(highPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold);

                DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    ampltdPlot, lowPassPlot, demeanedPlot, plot
                };
                Image debugImage = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, debugPlots, null);
                var   debugPath  = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram"));
                debugImage.Save(debugPath.FullName);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = null,
                Plots = plots,
                Events = acousticEvents,
            });
        }
        } //Analyze()

        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD
        /// Returns a DataTable
        /// </summary>
        /// <param name="fiSegmentOfSourceFile"></param>
        /// <param name="configDict"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="diOutputDir"></param>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values - ignor those set by user
            int    frameSize     = 1024;
            double windowOverlap = 0.0;

            int    upperBandMinHz     = int.Parse(configDict[KeyUpperfreqbandBtm]);
            int    upperBandMaxHz     = int.Parse(configDict[KeyUpperfreqbandTop]);
            int    lowerBandMinHz     = int.Parse(configDict[KeyLowerfreqbandBtm]);
            int    lowerBandMaxHz     = int.Parse(configDict[KeyLowerfreqbandTop]);
            double decibelThreshold   = double.Parse(configDict[KeyDecibelThreshold]);;  //dB
            double intensityThreshold = double.Parse(configDict[KeyIntensityThreshold]); //in 0-1
            double minDuration        = double.Parse(configDict[KeyMinDuration]);        // seconds
            double maxDuration        = double.Parse(configDict[KeyMaxDuration]);        // seconds
            double minPeriod          = double.Parse(configDict[KeyMinPeriod]);          // seconds
            double maxPeriod          = double.Parse(configDict[KeyMaxPeriod]);          // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName   = recording.BaseName;
            sonoConfig.WindowSize    = frameSize;
            sonoConfig.WindowOverlap = windowOverlap;
            //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE");
            sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD");
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = freqBinWidth;

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700

            int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, (rowCount - 1), lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, (rowCount - 1), upperBandMaxBin);

            int step         = (int)Math.Round(framesPerSecond); //take one second steps
            int stepCount    = rowCount / step;
            int sampleLength = 64;                               //64 frames = 3.7 seconds. Suitable for Lewins Rail.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES
            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if ((lowerSubarray.Length != sampleLength) || (upperSubarray.Length != sampleLength))
                {
                    break;
                }
                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 3;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;                                  //in real data these bins are dominant and hide other frequency content
                }
                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if ((period < minPeriod) || (period > maxPeriod))
                {
                    continue;
                }
                for (int j = 0; j < sampleLength; j++) //lay down score for sample length
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }
                    periodicity[start + j] = period;
                }
            }
            //######################################################################

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 5);
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerBandMinHz,
                upperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray);
            var hits = new double[rowCount, colCount];

            return(Tuple.Create(sonogram, hits, intensity, predictedEvents, tsRecordingtDuration));
        } //Analysis()
Ejemplo n.º 12
0
        public static void Execute(Arguments arguments)
        {
            if (arguments == null)
            {
                arguments = Dev();
            }

            LoggedConsole.WriteLine("DATE AND TIME:" + DateTime.Now);
            LoggedConsole.WriteLine("Syntactic Pattern Recognition\n");
            //StringBuilder sb = new StringBuilder("DATE AND TIME:" + DateTime.Now + "\n");
            //sb.Append("SCAN ALL RECORDINGS IN A DIRECTORY USING HTK-RECOGNISER\n");

            Log.Verbosity = 1;

            FileInfo      recordingPath = arguments.Source;
            FileInfo      iniPath       = arguments.Config;
            DirectoryInfo outputDir     = arguments.Output;
            string        opFName       = "SPR-output.txt";
            string        opPath        = outputDir + opFName;

            Log.WriteIfVerbose("# Output folder =" + outputDir);

            // A: READ PARAMETER VALUES FROM INI FILE
            var config = new ConfigDictionary(iniPath);
            Dictionary <string, string> dict = config.GetTable();

            Dictionary <string, string> .KeyCollection keys = dict.Keys;

            string callName     = dict[key_CALL_NAME];
            double frameOverlap = Convert.ToDouble(dict[key_FRAME_OVERLAP]);
            //SPT PARAMETERS
            double intensityThreshold   = Convert.ToDouble(dict[key_SPT_INTENSITY_THRESHOLD]);
            int    smallLengthThreshold = Convert.ToInt32(dict[key_SPT_SMALL_LENGTH_THRESHOLD]);
            //WHIPBIRD PARAMETERS
            int    whistle_MinHz          = int.Parse(dict[key_WHISTLE_MIN_HZ]);
            int    whistle_MaxHz          = int.Parse(dict[key_WHISTLE_MAX_HZ]);
            double optimumWhistleDuration = double.Parse(dict[key_WHISTLE_DURATION]);   //optimum duration of whistle in seconds
            int    whip_MinHz             = (dict.ContainsKey(key_WHIP_MIN_HZ)) ? int.Parse(dict[key_WHIP_MIN_HZ]) : 0;
            int    whip_MaxHz             = (dict.ContainsKey(key_WHIP_MAX_HZ)) ? int.Parse(dict[key_WHIP_MAX_HZ]) : 0;
            double whipDuration           = (dict.ContainsKey(key_WHIP_DURATION)) ? double.Parse(dict[key_WHIP_DURATION]) : 0.0; //duration of whip in seconds
            //CURLEW PARAMETERS
            double minDuration = (dict.ContainsKey(key_MIN_DURATION)) ? double.Parse(dict[key_MIN_DURATION]) : 0.0;              //min duration of call in seconds
            double maxDuration = (dict.ContainsKey(key_MAX_DURATION)) ? double.Parse(dict[key_MAX_DURATION]) : 0.0;              //duration of call in seconds

            double eventThreshold = double.Parse(dict[key_EVENT_THRESHOLD]);                                                     //min score for an acceptable event
            int    DRAW_SONOGRAMS = Convert.ToInt16(dict[key_DRAW_SONOGRAMS]);

            // B: CHECK to see if conversion from .MP3 to .WAV is necessary
            var destinationAudioFile = recordingPath;

            //LOAD RECORDING AND MAKE SONOGRAM
            BaseSonogram sonogram = null;

            using (var recording = new AudioRecording(destinationAudioFile.FullName))
            {
                // if (recording.SampleRate != 22050) recording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE

                var sonoConfig = new SonogramConfig
                {
                    NoiseReductionType = NoiseReductionType.None,
                    //NoiseReductionType = NoiseReductionType.STANDARD,
                    WindowOverlap = frameOverlap,
                };
                sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            }

            List <AcousticEvent> predictedEvents = null;

            double[,] hits = null;
            double[] scores = null;

            var audioFileName = Path.GetFileNameWithoutExtension(destinationAudioFile.FullName);

            if (callName.Equals("WHIPBIRD"))
            {
                //SPT
                var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold);
                //SPR
                Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold);
                int    slope       = 0;   //degrees of the circle. i.e. 90 = vertical line.
                double sensitivity = 0.7; //lower value = more sensitive
                var    mHori       = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity);
                slope       = 87;         //84
                sensitivity = 0.8;        //lower value = more sensitive
                var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 4, intensityThreshold + 1, sensitivity);
                Log.WriteLine("SPR finished");
                Log.WriteLine("Extract Whipbird calls - start");

                int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth);
                int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth);
                int whistleFrames    = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); //86 = frames/sec.
                int minBound_Whip    = (int)(whip_MinHz / sonogram.FBinWidth);
                int maxBound_Whip    = (int)(whip_MaxHz / sonogram.FBinWidth);
                int whipFrames       = (int)(sonogram.FramesPerSecond * whipDuration); //86 = frames/sec.
                var result3          = DetectWhipBird(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames, minBound_Whip, maxBound_Whip, whipFrames, smallLengthThreshold);
                scores = result3.Item1;
                hits   = DataTools.AddMatrices(mHori, mVert);

                predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                    scores,
                    whip_MinHz,
                    whip_MaxHz,
                    sonogram.FramesPerSecond,
                    sonogram.FBinWidth,
                    eventThreshold,
                    minDuration,
                    maxDuration,
                    TimeSpan.Zero);
                foreach (AcousticEvent ev in predictedEvents)
                {
                    ev.FileName = audioFileName;
                    ev.Name     = callName;
                }

                sonogram.Data = result1.Item1;
                Log.WriteLine("Extract Whipbird calls - finished");
            }
            else if (callName.Equals("CURLEW"))
            {
                //SPT
                double backgroundThreshold = 4.0;
                var    result1             = SNR.NoiseReduce(sonogram.Data, NoiseReductionType.Standard, backgroundThreshold);
                //var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold);
                //var result1 = doNoiseRemoval(sonogram, intensityThreshold, smallLengthThreshold);

                //SPR
                Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold);
                int    slope       = 20;  //degrees of the circle. i.e. 90 = vertical line.
                double sensitivity = 0.8; //lower value = more sensitive
                var    mHori       = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity);
                slope       = 160;
                sensitivity = 0.8;        //lower value = more sensitive
                var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 3, intensityThreshold + 1, sensitivity);
                Log.WriteLine("SPR finished");

                //detect curlew calls
                int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth);
                int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth);
                int whistleFrames    = (int)(sonogram.FramesPerSecond * optimumWhistleDuration);
                var result3          = DetectCurlew(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames, smallLengthThreshold);

                //process curlew scores - look for curlew characteristic periodicity
                double minPeriod        = 1.2;
                double maxPeriod        = 1.8;
                int    minPeriod_frames = (int)Math.Round(sonogram.FramesPerSecond * minPeriod);
                int    maxPeriod_frames = (int)Math.Round(sonogram.FramesPerSecond * maxPeriod);
                scores = DataTools.filterMovingAverage(result3.Item1, 21);
                scores = DataTools.PeriodicityDetection(scores, minPeriod_frames, maxPeriod_frames);

                //extract events
                predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                    scores,
                    whistle_MinHz,
                    whistle_MaxHz,
                    sonogram.FramesPerSecond,
                    sonogram.FBinWidth,
                    eventThreshold,
                    minDuration,
                    maxDuration,
                    TimeSpan.Zero);
                foreach (AcousticEvent ev in predictedEvents)
                {
                    ev.FileName = audioFileName;
                    ev.Name     = callName;
                }

                hits          = DataTools.AddMatrices(mHori, mVert);
                sonogram.Data = result1.Item1;
                Log.WriteLine("Extract Curlew calls - finished");
            }
            else if (callName.Equals("CURRAWONG"))
            {
                //SPT
                var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold);
                //SPR
                Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold);
                int slope = 70;           //degrees of the circle. i.e. 90 = vertical line.
                //slope = 210;
                double sensitivity = 0.7; //lower value = more sensitive
                var    mHori       = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity);
                slope = 110;
                //slope = 340;
                sensitivity = 0.7;        //lower value = more sensitive
                var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 3, intensityThreshold + 1, sensitivity);
                Log.WriteLine("SPR finished");

                int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth);
                int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth);
                int whistleFrames    = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); //86 = frames/sec.
                var result3          = DetectCurlew(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames + 10, smallLengthThreshold);
                scores = result3.Item1;
                hits   = DataTools.AddMatrices(mHori, mVert);

                predictedEvents = AcousticEvent.ConvertIntensityArray2Events(
                    scores,
                    TimeSpan.Zero,
                    whistle_MinHz,
                    whistle_MaxHz,
                    sonogram.FramesPerSecond,
                    sonogram.FBinWidth,
                    eventThreshold,
                    0.5,
                    maxDuration);
                foreach (AcousticEvent ev in predictedEvents)
                {
                    ev.FileName = audioFileName;
                    //ev.Name = callName;
                }
            }

            //write event count to results file.
            double sigDuration = sonogram.Duration.TotalSeconds;
            //string fname = Path.GetFileName(recordingPath);
            int count = predictedEvents.Count;

            Log.WriteIfVerbose("Number of Events: " + count);
            string str = string.Format("{0}\t{1}\t{2}", callName, sigDuration, count);

            FileTools.WriteTextFile(opPath, AcousticEvent.WriteEvents(predictedEvents, str).ToString());

            // SAVE IMAGE
            string imageName = outputDir + audioFileName;
            string imagePath = imageName + ".png";

            if (File.Exists(imagePath))
            {
                int suffix = 1;
                while (File.Exists(imageName + "." + suffix.ToString() + ".png"))
                {
                    suffix++;
                }
                //{
                //    suffix = (suffix == string.Empty) ? "1" : (int.Parse(suffix) + 1).ToString();
                //}
                //File.Delete(outputDir + audioFileName + "." + suffix.ToString() + ".png");
                File.Move(imagePath, imageName + "." + suffix.ToString() + ".png");
            }
            //string newPath = imagePath + suffix + ".png";
            if (DRAW_SONOGRAMS == 2)
            {
                DrawSonogram(sonogram, imagePath, hits, scores, predictedEvents, eventThreshold);
            }
            else
            if ((DRAW_SONOGRAMS == 1) && (predictedEvents.Count > 0))
            {
                DrawSonogram(sonogram, imagePath, hits, scores, predictedEvents, eventThreshold);
            }

            Log.WriteIfVerbose("Image saved to: " + imagePath);
            //string savePath = outputDir + Path.GetFileNameWithoutExtension(recordingPath);
            //string suffix = string.Empty;
            //Image im = sonogram.GetImage(false, false);
            //string newPath = savePath + suffix + ".jpg";
            //im.Save(newPath);

            LoggedConsole.WriteLine("\nFINISHED RECORDING!");
            Console.ReadLine();
        }
Ejemplo n.º 13
0
        /// <summary>
        /// THIS IS THE CORE DETECTION METHOD
        /// Detects the human voice
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values
            int frameLength = 1024;

            if (configDict.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]);
            }

            double windowOverlap = 0.0;

            int    minHz              = int.Parse(configDict["MIN_HZ"]);
            int    minFormantgap      = int.Parse(configDict["MIN_FORMANT_GAP"]);
            int    maxFormantgap      = int.Parse(configDict["MAX_FORMANT_GAP"]);
            double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double minDuration        = double.Parse(configDict["MIN_DURATION"]);        // seconds
            double maxDuration        = double.Parse(configDict["MAX_DURATION"]);        // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig
            {
                //default values config
                SourceFName        = recording.BaseName,
                WindowSize         = frameLength,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            var    tsRecordingtDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int numberOfBins = 64;
            int minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int maxbin       = minBin + numberOfBins - 1;
            int maxHz        = (int)Math.Round(minHz + (numberOfBins * freqBinWidth));

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin);

            //ii: DETECT HARMONICS
            int zeroBinCount = 4; //to remove low freq content which dominates the spectrum
            var results      = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount);

            double[] intensity   = results.Item1;
            double[] periodicity = results.Item2; //an array of periodicity scores

            //intensity = DataTools.filterMovingAverage(intensity, 3);
            //expect humans to have max power >100 and < 1000 Hz. Set these bounds
            int lowerHumanMaxBound = (int)(100 / freqBinWidth);  //ignore 0-100 hz - too much noise
            int upperHumanMaxBound = (int)(3000 / freqBinWidth); //ignore above 2500 hz

            double[] scoreArray = new double[intensity.Length];
            for (int r = 0; r < rowCount; r++)
            {
                if (intensity[r] < intensityThreshold)
                {
                    continue;
                }

                //ignore locations with incorrect formant gap
                double herzPeriod = periodicity[r] * freqBinWidth;
                if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap)
                {
                    continue;
                }

                //find freq having max power and use info to adjust score.
                double[] spectrum = MatrixTools.GetRow(sonogram.Data, r);
                for (int j = 0; j < lowerHumanMaxBound; j++)
                {
                    spectrum[j] = 0.0;
                }

                for (int j = upperHumanMaxBound; j < spectrum.Length; j++)
                {
                    spectrum[j] = 0.0;
                }

                double[] peakvalues = DataTools.GetPeakValues(spectrum);
                int      maxIndex1  = DataTools.GetMaxIndex(peakvalues);
                peakvalues[maxIndex1] = 0.0;
                int maxIndex2 = DataTools.GetMaxIndex(peakvalues);
                int avMaxBin  = (maxIndex1 + maxIndex2) / 2;

                //int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth);
                int    freqWithMaxPower = (int)Math.Round(avMaxBin * freqBinWidth);
                double discount         = 1.0;
                if (freqWithMaxPower > 1000)
                {
                    discount = 0.0;
                }
                else
                if (freqWithMaxPower < 500)
                {
                    discount = 0.0;
                }

                //set scoreArray[r]  - ignore locations with low intensity
                if (intensity[r] > intensityThreshold)
                {
                    scoreArray[r] = intensity[r] * discount;
                }
            }

            //transfer info to a hits matrix.
            var    hits      = new double[rowCount, colCount];
            double threshold = intensityThreshold * 0.75; //reduced threshold for display of hits

            for (int r = 0; r < rowCount; r++)
            {
                if (scoreArray[r] < threshold)
                {
                    continue;
                }

                double herzPeriod = periodicity[r] * freqBinWidth;
                for (int c = minBin; c < maxbin; c++)
                {
                    //hits[r, c] = herzPeriod / (double)380;  //divide by 380 to get a relativePeriod;
                    hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap;  //to get a relativePeriod;
                }
            }

            //iii: CONVERT TO ACOUSTIC EVENTS
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                scoreArray,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            //remove isolated speech events - expect humans to talk like politicians
            //predictedEvents = Human2.FilterHumanSpeechEvents(predictedEvents);
            Plot plot = new Plot(AnalysisName, intensity, intensityThreshold);

            return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration));
        } //Analysis()
        ///  <summary>
        ///  ################ THE KEY ANALYSIS METHOD for TRILLS
        ///
        ///  See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles.
        ///  </summary>
        /// <param name="recording"></param>
        /// <param name="sonoConfig"></param>
        /// <param name="lwConfig"></param>
        /// <param name="returnDebugImage"></param>
        /// <param name="segmentStartOffset"></param>
        /// <returns></returns>
        private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LitoriaWatjulumConfig lwConfig,
            bool returnDebugImage,
            TimeSpan segmentStartOffset)
        {
            double intensityThreshold = lwConfig.IntensityThreshold;
            double minDuration        = lwConfig.MinDurationOfTrill; // seconds
            double maxDuration        = lwConfig.MaxDurationOfTrill; // seconds
            double minPeriod          = lwConfig.MinPeriod;          // seconds
            double maxPeriod          = lwConfig.MaxPeriod;          // seconds

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            //TimeSpan tsRecordingtDuration = recording.Duration();
            int    sr              = recording.SampleRate;
            double freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double framesPerSecond = freqBinWidth;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double dctDuration = 4 * maxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);

            //int colCount = sonogram.Data.GetLength(1);

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            //lowerArray = DataTools.filterMovingAverage(lowerArray, 3);
            //upperArray = DataTools.filterMovingAverage(upperArray, 3);

            double[] amplitudeScores  = DataTools.SumMinusDifference(lowerArray, upperArray);
            double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7);

            // Could smooth here rather than above. Above seemed slightly better?
            //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7);
            //differenceScores = DataTools.filterMovingAverage(differenceScores, 7);

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS
            var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lwConfig.LowerBandMinHz,
                lwConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                lwConfig.DecibelThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            for (int i = 0; i < differenceScores.Length; i++)
            {
                if (differenceScores[i] < 1.0)
                {
                    differenceScores[i] = 0.0;
                }
            }

            // LOOK FOR TRILL EVENTS
            // init the score array
            double[] scores = new double[rowCount];

            // var hits = new double[rowCount, colCount];
            double[,] hits = null;

            // init confirmed events
            var confirmedEvents = new List <AcousticEvent>();

            // add names into the returned events
            foreach (var ae in predictedTrillEvents)
            {
                int    eventStart       = ae.Oblong.RowTop;
                int    eventWidth       = ae.Oblong.RowWidth;
                int    step             = 2;
                double maximumIntensity = 0.0;

                // scan the event to get oscillation period and intensity
                for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step)
                {
                    // Look for oscillations in the difference array
                    double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength);
                    double   oscilFreq;
                    double   period;
                    double   intensity;
                    Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity);

                    bool periodWithinBounds = period > minPeriod && period < maxPeriod;

                    //Console.WriteLine($"step={i}    period={period:f4}");

                    if (!periodWithinBounds)
                    {
                        continue;
                    }

                    for (int j = 0; j < dctLength; j++) //lay down score for sample length
                    {
                        if (scores[i + j] < intensity)
                        {
                            scores[i + j] = intensity;
                        }
                    }

                    if (maximumIntensity < intensity)
                    {
                        maximumIntensity = intensity;
                    }
                }

                // add abbreviatedSpeciesName into event
                if (maximumIntensity >= intensityThreshold)
                {
                    ae.Name             = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}";
                    ae.Score_MaxInEvent = maximumIntensity;
                    ae.Profile          = lwConfig.ProfileNames[0];
                    confirmedEvents.Add(ae);
                }
            }

            //######################################################################
            // LOOK FOR TINK EVENTS
            // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            double minDurationOfTink = lwConfig.MinDurationOfTink;  // seconds
            double maxDurationOfTink = lwConfig.MaxDurationOfTink;  // seconds

            // want stronger threshold for tink because brief.
            double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0;
            var    predictedTinkEvents  = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lwConfig.LowerBandMinHz,
                lwConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                tinkDecibelThreshold,
                minDurationOfTink,
                maxDurationOfTink,
                segmentStartOffset);

            foreach (var ae2 in predictedTinkEvents)
            {
                // Prune the list of potential acoustic events, for example using Cosine Similarity.

                //rowtop,  rowWidth
                //int eventStart = ae2.Oblong.RowTop;
                //int eventWidth = ae2.Oblong.RowWidth;
                //int step = 2;
                //double maximumIntensity = 0.0;

                // add abbreviatedSpeciesName into event
                //if (maximumIntensity >= intensityThreshold)
                //{
                ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}";

                //ae2.Score_MaxInEvent = maximumIntensity;
                ae2.Profile = lwConfig.ProfileNames[1];
                confirmedEvents.Add(ae2);

                //}
            }

            //######################################################################

            var   scorePlot  = new Plot(lwConfig.SpeciesName, scores, intensityThreshold);
            Image debugImage = null;

            if (returnDebugImage)
            {
                // display a variety of debug score arrays
                double[] normalisedScores;
                double   normalisedThreshold;
                DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold);
                var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold);
                DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold);
                var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, sumDiffPlot, differencePlot
                };
                debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits);
            }

            // return new sonogram because it makes for more easy interpretation of the image
            var returnSonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = 512,
                WindowOverlap = 0,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader);

            return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage));
        } //Analysis()
Ejemplo n.º 15
0
        /// <summary>
        /// THE KEY ANALYSIS METHOD.
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LitoriaBicolorConfig lbConfig,
            bool drawDebugImage,
            TimeSpan segmentStartOffset)
        {
            double decibelThreshold   = lbConfig.DecibelThreshold; //dB
            double intensityThreshold = lbConfig.IntensityThreshold;

            //double eventThreshold = lbConfig.EventThreshold; //in 0-1

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            //TimeSpan tsRecordingtDuration = recording.Duration();
            int    sr              = recording.SampleRate;
            double freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double framesPerSecond = freqBinWidth;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double dctDuration = 3 * lbConfig.MaxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            //lowerArray = DataTools.filterMovingAverage(lowerArray, 3);
            //upperArray = DataTools.filterMovingAverage(upperArray, 3);

            double[] amplitudeScores  = DataTools.SumMinusDifference(lowerArray, upperArray);
            double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0);

            // Could smooth here rather than above. Above seemed slightly better?
            amplitudeScores  = DataTools.filterMovingAverage(amplitudeScores, 7);
            differenceScores = DataTools.filterMovingAverage(differenceScores, 7);

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lbConfig.LowerBandMinHz,
                lbConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                decibelThreshold,
                lbConfig.MinDuration,
                lbConfig.MaxDuration,
                segmentStartOffset);

            for (int i = 0; i < differenceScores.Length; i++)
            {
                if (differenceScores[i] < 1.0)
                {
                    differenceScores[i] = 0.0;
                }
            }

            // init the score array
            double[] scores = new double[rowCount];

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            // var hits = new double[rowCount, colCount];
            double[,] hits = null;

            // init confirmed events
            var confirmedEvents = new List <AcousticEvent>();

            // add names into the returned events
            foreach (var ae in predictedEvents)
            {
                //rowtop,  rowWidth
                int    eventStart       = ae.Oblong.RowTop;
                int    eventWidth       = ae.Oblong.RowWidth;
                int    step             = 2;
                double maximumIntensity = 0.0;

                // scan the event to get oscillation period and intensity
                for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step)
                {
                    // Look for oscillations in the difference array
                    double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength);
                    Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity);

                    bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod;

                    //Console.WriteLine($"step={i}    period={period:f4}");

                    if (!periodWithinBounds)
                    {
                        continue;
                    }

                    // lay down score for sample length
                    for (int j = 0; j < dctLength; j++)
                    {
                        if (scores[i + j] < intensity)
                        {
                            scores[i + j] = intensity;
                        }
                    }

                    if (maximumIntensity < intensity)
                    {
                        maximumIntensity = intensity;
                    }
                }

                // add abbreviatedSpeciesName into event
                if (maximumIntensity >= intensityThreshold)
                {
                    ae.Name             = "L.b";
                    ae.Score_MaxInEvent = maximumIntensity;
                    confirmedEvents.Add(ae);
                }
            }

            //######################################################################

            // calculate the cosine similarity scores
            var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold);

            //DEBUG IMAGE this recognizer only. MUST set false for deployment.
            Image debugImage = null;

            if (drawDebugImage)
            {
                // display a variety of debug score arrays

                //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold);
                //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold);
                //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold);
                DataTools.Normalise(amplitudeScores, decibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold);
                DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold);
                var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, sumDiffPlot, differencePlot
                };

                // other debug plots
                //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot };
                debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits);
            }

            // return new sonogram because it makes for more easy interpretation of the image
            var returnSonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = 512,
                WindowOverlap = 0,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader);

            return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage));
        } //Analysis()
Ejemplo n.º 16
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            var recognizerConfig = new LitoriaNasutaConfig();

            recognizerConfig.ReadConfigFile(configuration);

            // BETTER TO SET THESE. IGNORE USER!
            // this default framesize seems to work
            const int    frameSize     = 1024;
            const double windowOverlap = 0.0;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // use the default HAMMING window
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),

                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.None
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.0,
            };

            TimeSpan recordingDuration = recording.WavReader.Time;
            int      sr               = recording.SampleRate;
            double   freqBinWidth     = sr / (double)sonoConfig.WindowSize;
            int      minBin           = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1;
            int      maxBin           = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1;
            var      decibelThreshold = 3.0;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            int rowCount = sonogram.Data.GetLength(0);

            double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            //double[] topBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, maxBin + 3, (rowCount - 1), maxBin + 9);
            //double[] botBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin - 3, (rowCount - 1), minBin - 9);

            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeArray,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                decibelThreshold,
                recognizerConfig.MinDuration,
                recognizerConfig.MaxDuration,
                segmentStartOffset);

            double[,] hits = null;
            var prunedEvents = new List <AcousticEvent>();

            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = recognizerConfig.SpeciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
            });

            var thresholdedPlot = new double[amplitudeArray.Length];

            for (int x = 0; x < amplitudeArray.Length; x++)
            {
                if (amplitudeArray[x] > decibelThreshold)
                {
                    thresholdedPlot[x] = amplitudeArray[x];
                }
            }

            var maxDb = amplitudeArray.MaxOrDefault();

            double[] normalisedScores;
            double   normalisedThreshold;

            DataTools.Normalise(thresholdedPlot, decibelThreshold, out normalisedScores, out normalisedThreshold);
            var text = string.Format($"{this.DisplayName} (Fullscale={maxDb:f1}dB)");
            var plot = new Plot(text, normalisedScores, normalisedThreshold);

            if (true)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    plot, amplPlot
                };

                // NOTE: This DrawDebugImage() method can be over-written in this class.
                var debugImage = DrawDebugImage(sonogram, acousticEvents, debugPlots, hits);
                var debugPath  = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");
                debugImage.Save(debugPath);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = acousticEvents,
            });
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            // common properties
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no name>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // BETTER TO CALCULATE THIS. IGNORE USER!
            // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]);
            // duration of DCT in seconds
            //double dctDuration = (double)configuration[AnalysisKeys.DctDuration];

            // minimum acceptable value of a DCT coefficient
            //double dctThreshold = (double)configuration[AnalysisKeys.DctThreshold];
            double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0;
            double decibelThreshold        = configuration.GetDouble("DecibelThreshold");

            //double minPeriod = (double)configuration["MinPeriod"]; //: 0.18
            //double maxPeriod = (double)configuration["MaxPeriod"]; //

            //int maxOscilRate = (int)Math.Ceiling(1 /minPeriod);
            //int minOscilRate = (int)Math.Floor(1 /maxPeriod);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in second
            var maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            var eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            // this default framesize and overlap is best for the White Hrron of Bhutan.
            const int frameSize     = 2048;
            double    windowOverlap = 0.0;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            var    recordingDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;
            int    minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int    maxBin       = (int)Math.Round(maxHz / freqBinWidth) + 1;

            /* #############################################################################################################################################
             * window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
             * 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
             * 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
             * 2048     17640      116.1ms           8.6         8.6    7430ms           551hz          1100hz
             * 2048     22050       92.8ms          21.5        10.7666 1472ms
             */

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            // var templates = GetTemplatesForAlgorithm1(14);
            var amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            bool[] peakArray       = new bool[rowCount];
            var    amplitudeScores = new double[rowCount];
            var    hits            = new double[rowCount, colCount];

            const int maxTemplateLength  = 20;
            const int templateEndPadding = 7;
            const int templateOffset     = 14;
            const int minimumGap         = 4;
            const int maximumGap         = 100;

            // first find the amplitude peaks
            for (int j = 1; j < amplitudeArray.Length - 1; j++)
            {
                if (amplitudeArray[j] < decibelThreshold)
                {
                    continue;
                }

                if (amplitudeArray[j] > amplitudeArray[j - 1] && amplitudeArray[j] > amplitudeArray[j + 1])
                {
                    peakArray[j] = true;
                }
            }

            // get template for end of Herron call
            var endTemplate = GetEndTemplateForAlgorithm2();

            // now search for peaks that are the correct distance apart.
            for (int i = 2; i < amplitudeArray.Length - maxTemplateLength - templateEndPadding; i++)
            {
                if (!peakArray[i])
                {
                    continue;
                }

                // calculate distance to next peak
                int distanceToNextPeak = CalculateDistanceToNextPeak(peakArray, i);

                // skip gaps that are too small or too large
                if (distanceToNextPeak < minimumGap || distanceToNextPeak > maximumGap)
                {
                    continue;
                }

                // The herron call ends with a rising whip
                // Check end of call using end template
                if (distanceToNextPeak > maxTemplateLength)
                {
                    int start = i - templateOffset;
                    if (start < 0)
                    {
                        start = 0;
                    }

                    var    endLocality = DataTools.Subarray(amplitudeArray, start, endTemplate.Length);
                    double endScore    = DataTools.CosineSimilarity(endLocality, endTemplate);
                    for (int to = -templateOffset; to < endTemplate.Length - templateOffset; to++)
                    {
                        if (i + to >= 0 && endScore > amplitudeScores[i + to])
                        {
                            amplitudeScores[i + to] = endScore;

                            // hits[i, minBin] = 10;
                        }
                    }

                    for (int k = 2; k < maxTemplateLength; k++)
                    {
                        amplitudeScores[i + k] = 0.0;
                    }

                    continue;
                }

                // Get the start template which depends on distance to next peak.
                var startTemplate = GetTemplateForAlgorithm2(distanceToNextPeak, templateEndPadding);

                // now calculate similarity of locality with the startTemplate
                var    locality = DataTools.Subarray(amplitudeArray, i - 2, startTemplate.Length); // i-2 because first two places should be zero.
                double score    = DataTools.CosineSimilarity(locality, startTemplate);
                for (int t = 0; t < startTemplate.Length; t++)
                {
                    if (score > amplitudeScores[i + t])
                    {
                        amplitudeScores[i + t] = score;
                        hits[i, minBin]        = 10;
                    }
                }
            } // loop over peak array

            var smoothedScores = DataTools.filterMovingAverageOdd(amplitudeScores, 3);

            // iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                smoothedScores,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            var prunedEvents = new List <AcousticEvent>();

            foreach (var ae in predictedEvents)
            {
                if (ae.EventDurationSeconds < minDuration)
                {
                    continue;
                }

                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = abbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // do a recognizer test.
            //CompareArrayWithBenchmark(scores, new FileInfo(recording.FilePath));
            //CompareArrayWithBenchmark(prunedEvents, new FileInfo(recording.FilePath));

            var plot = new Plot(this.DisplayName, amplitudeScores, eventThreshold);

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = prunedEvents,
            });
        }