Beispiel #1
0
        } // GetSpectralMaxima()

        /// <summary>
        /// THIS METHOD CALLED ONLY FROM THE Frogs.CS class.
        /// returns an array showing which freq bin in each frame has the maximum amplitude.
        /// However only returns values for those frames in the neighbourhood of an envelope peak.
        /// </summary>
        /// <param name="decibelsPerFrame"></param>
        /// <param name="spectrogram"></param>
        /// <param name="threshold"></param>
        /// <param name="nhLimit"></param>
        /// <returns></returns>
        public static Tuple <int[], double[, ]> GetSpectralMaxima(double[] decibelsPerFrame, double[,] spectrogram, double threshold, int nhLimit)
        {
            int rowCount = spectrogram.GetLength(0);
            int colCount = spectrogram.GetLength(1);

            var peaks = DataTools.GetPeakValues(decibelsPerFrame);

            var maxFreqArray = new int[rowCount]; //array (one element per frame) indicating which freq bin has max amplitude.
            var hitsMatrix   = new double[rowCount, colCount];

            for (int r = nhLimit; r < rowCount - nhLimit; r++)
            {
                if (peaks[r] < threshold)
                {
                    continue;
                }

                //find local freq maxima and store in freqArray & hits matrix.
                for (int nh = -nhLimit; nh < nhLimit; nh++)
                {
                    double[] spectrum = MatrixTools.GetRow(spectrogram, r + nh);
                    spectrum[0] = 0.0;                    // set DC = 0.0 just in case it is max.
                    int maxFreqbin = DataTools.GetMaxIndex(spectrum);
                    if (spectrum[maxFreqbin] > threshold) //only record spectral peak if it is above threshold.
                    {
                        maxFreqArray[r + nh] = maxFreqbin;

                        //if ((spectrum[maxFreqbin] > dBThreshold) && (sonogram.Data[r, maxFreqbin] >= sonogram.Data[r - 1, maxFreqbin]) && (sonogram.Data[r, maxFreqbin] >= sonogram.Data[r + 1, maxFreqbin]))
                        hitsMatrix[r + nh, maxFreqbin] = 1.0;
                    }
                }
            }

            return(Tuple.Create(maxFreqArray, hitsMatrix));
        } // GetSpectralMaxima()
//Analyze()

        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD
        /// Returns a DataTable
        /// </summary>
        /// <param name="fiSegmentOfSourceFile"></param>
        /// <param name="analysisSettings"></param>
        /// <param name="originalSampleRate"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="configDict"></param>
        /// <param name="diOutputDir"></param>
        public static Tuple <BaseSonogram, double[, ], List <Plot>, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, AnalysisSettings analysisSettings, int originalSampleRate, TimeSpan segmentStartOffset)
        {
            Dictionary <string, string> configDict = analysisSettings.ConfigDict;
            int originalAudioNyquist = originalSampleRate / 2; // original sample rate can be anything 11.0-44.1 kHz.

            //set default values - ignore those set by user
            int    frameSize          = 32;
            double windowOverlap      = 0.3;
            int    xCorrelationLength = 256; //for Xcorrelation   - 256 frames @801 = 320ms, almost 1/3 second.
            //int xCorrelationLength = 128;   //for Xcorrelation   - 128 frames @801 = 160ms, almost 1/6 second.
            //int xCorrelationLength = 64;   //for Xcorrelation   - 64 frames @128 = 232ms, almost 1/4 second.
            //int xCorrelationLength = 16;   //for Xcorrelation   - 16 frames @128 = 232ms, almost 1/4 second.
            double dBThreshold = 12.0;

            // read frog data to datatable
            var dt = CsvTools.ReadCSVToTable(configDict[key_FROG_DATA], true);                     // read file contining parameters of frog calls to a table

            double intensityThreshold = double.Parse(configDict[AnalysisKeys.IntensityThreshold]); //in 0-1
            double minDuration        = double.Parse(configDict[AnalysisKeys.MinDuration]);        // seconds
            double maxDuration        = double.Parse(configDict[AnalysisKeys.MaxDuration]);        // seconds
            double minPeriod          = double.Parse(configDict[AnalysisKeys.MinPeriodicity]);     // seconds
            double maxPeriod          = double.Parse(configDict[AnalysisKeys.MaxPeriodicity]);     // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName   = recording.BaseName;
            sonoConfig.WindowSize    = frameSize;
            sonoConfig.WindowOverlap = windowOverlap;
            //sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("NONE");
            sonoConfig.NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD");   //must do noise removal
            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   frameOffset     = sonoConfig.GetFrameOffset(sr);
            double   framesPerSecond = 1 / frameOffset;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            //iii: GET TRACKS
            int nhLimit      = 3; //limit of neighbourhood around maximum
            var peaks        = DataTools.GetPeakValues(sonogram.DecibelsPerFrame);
            var tuple        = SpectralTrack.GetSpectralMaxima(sonogram.DecibelsPerFrame, sonogram.Data, dBThreshold, nhLimit);
            var maxFreqArray = tuple.Item1; //array (one element per frame) indicating which freq bin has max amplitude.
            var hitsMatrix   = tuple.Item2;
            int herzOffset   = 0;
            int maxFreq      = 6000;
            var tracks       = SpectralTrack.GetSpectralTracks(maxFreqArray, framesPerSecond, freqBinWidth, herzOffset, SpectralTrack.MIN_TRACK_DURATION, SpectralTrack.MAX_INTRASYLLABLE_GAP, maxFreq);

            double severity     = 0.5;
            double dynamicRange = 60; // deciBels above background noise. BG noise has already been removed from each bin.
            // convert sonogram to a list of frequency bin arrays
            var listOfFrequencyBins = SpectrogramTools.Sonogram2ListOfFreqBinArrays(sonogram, dynamicRange);
            int minFrameLength      = SpectralTrack.FrameCountEquivalent(SpectralTrack.MIN_TRACK_DURATION, framesPerSecond);

            for (int i = tracks.Count - 1; i >= 0; i--)
            {
                tracks[i].CropTrack(listOfFrequencyBins, severity);
                if (tracks[i].Length < minFrameLength)
                {
                    tracks.Remove(tracks[i]);
                }
            } // foreach track

            foreach (SpectralTrack track in tracks) // find any periodicity in the track and calculate its score.
            {
                SpectralTrack.DetectTrackPeriodicity(track, xCorrelationLength, listOfFrequencyBins, sonogram.FramesPerSecond);
            } // foreach track

            int rowCount       = sonogram.Data.GetLength(0);
            int MAX_FREQ_BOUND = 6000;
            int topBin         = (int)Math.Round(MAX_FREQ_BOUND / freqBinWidth);
            var plots          = CreateScorePlots(tracks, rowCount, topBin);

            //iv: CONVERT TRACKS TO ACOUSTIC EVENTS
            List <AcousticEvent> frogEvents = SpectralTrack.ConvertTracks2Events(tracks, segmentStartOffset);

            // v: GET FROG IDs
            //var frogEvents = new List<AcousticEvent>();
            foreach (AcousticEvent ae in frogEvents)
            {
                double oscRate = 1 / ae.Periodicity;
                // ae.DominantFreq
                // ae.Score
                // ae.Duration
                //ClassifyFrogEvent(ae);
                string[] names = ClassifyFrogEvent(ae.DominantFreq, oscRate, dt);
                ae.Name  = names[0];
                ae.Name2 = names[1];
            }

            return(Tuple.Create(sonogram, hitsMatrix, plots, frogEvents, tsRecordingtDuration));
        } //Analysis()
        /// <summary>
        /// THIS IS THE CORE DETECTION METHOD
        /// Detects the human voice
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values
            int frameLength = 1024;

            if (configDict.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]);
            }

            double windowOverlap = 0.0;

            int    minHz              = int.Parse(configDict["MIN_HZ"]);
            int    minFormantgap      = int.Parse(configDict["MIN_FORMANT_GAP"]);
            int    maxFormantgap      = int.Parse(configDict["MAX_FORMANT_GAP"]);
            double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double minDuration        = double.Parse(configDict["MIN_DURATION"]);        // seconds
            double maxDuration        = double.Parse(configDict["MAX_DURATION"]);        // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig
            {
                //default values config
                SourceFName        = recording.BaseName,
                WindowSize         = frameLength,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            var    tsRecordingtDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int numberOfBins = 64;
            int minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int maxbin       = minBin + numberOfBins - 1;
            int maxHz        = (int)Math.Round(minHz + (numberOfBins * freqBinWidth));

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin);

            //ii: DETECT HARMONICS
            int zeroBinCount = 4; //to remove low freq content which dominates the spectrum
            var results      = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount);

            double[] intensity   = results.Item1;
            double[] periodicity = results.Item2; //an array of periodicity scores

            //intensity = DataTools.filterMovingAverage(intensity, 3);
            //expect humans to have max power >100 and < 1000 Hz. Set these bounds
            int lowerHumanMaxBound = (int)(100 / freqBinWidth);  //ignore 0-100 hz - too much noise
            int upperHumanMaxBound = (int)(3000 / freqBinWidth); //ignore above 2500 hz

            double[] scoreArray = new double[intensity.Length];
            for (int r = 0; r < rowCount; r++)
            {
                if (intensity[r] < intensityThreshold)
                {
                    continue;
                }

                //ignore locations with incorrect formant gap
                double herzPeriod = periodicity[r] * freqBinWidth;
                if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap)
                {
                    continue;
                }

                //find freq having max power and use info to adjust score.
                double[] spectrum = MatrixTools.GetRow(sonogram.Data, r);
                for (int j = 0; j < lowerHumanMaxBound; j++)
                {
                    spectrum[j] = 0.0;
                }

                for (int j = upperHumanMaxBound; j < spectrum.Length; j++)
                {
                    spectrum[j] = 0.0;
                }

                double[] peakvalues = DataTools.GetPeakValues(spectrum);
                int      maxIndex1  = DataTools.GetMaxIndex(peakvalues);
                peakvalues[maxIndex1] = 0.0;
                int maxIndex2 = DataTools.GetMaxIndex(peakvalues);
                int avMaxBin  = (maxIndex1 + maxIndex2) / 2;

                //int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth);
                int    freqWithMaxPower = (int)Math.Round(avMaxBin * freqBinWidth);
                double discount         = 1.0;
                if (freqWithMaxPower > 1000)
                {
                    discount = 0.0;
                }
                else
                if (freqWithMaxPower < 500)
                {
                    discount = 0.0;
                }

                //set scoreArray[r]  - ignore locations with low intensity
                if (intensity[r] > intensityThreshold)
                {
                    scoreArray[r] = intensity[r] * discount;
                }
            }

            //transfer info to a hits matrix.
            var    hits      = new double[rowCount, colCount];
            double threshold = intensityThreshold * 0.75; //reduced threshold for display of hits

            for (int r = 0; r < rowCount; r++)
            {
                if (scoreArray[r] < threshold)
                {
                    continue;
                }

                double herzPeriod = periodicity[r] * freqBinWidth;
                for (int c = minBin; c < maxbin; c++)
                {
                    //hits[r, c] = herzPeriod / (double)380;  //divide by 380 to get a relativePeriod;
                    hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap;  //to get a relativePeriod;
                }
            }

            //iii: CONVERT TO ACOUSTIC EVENTS
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                scoreArray,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            //remove isolated speech events - expect humans to talk like politicians
            //predictedEvents = Human2.FilterHumanSpeechEvents(predictedEvents);
            Plot plot = new Plot(AnalysisName, intensity, intensityThreshold);

            return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration));
        } //Analysis()