Пример #1
0
        public double[] GetLogPsd()
        {
            var psd    = MatrixTools.GetColumnAverages(this.Data);
            var logPsd = DataTools.LogValues(psd);

            return(logPsd);
        }
Пример #2
0
        public void GetPsd(string path)
        {
            var psd = MatrixTools.GetColumnAverages(this.Data);

            FileTools.WriteArray2File(psd, path + ".csv");
            GraphsAndCharts.DrawGraph(psd, "Title", new FileInfo(path));

            //GraphsAndCharts.DrawGraph("Title", psd, width, height, 4 new FileInfo(path));
            //image.Save(path);
        }
Пример #3
0
        public void DrawLogPsd(string path)
        {
            var psd    = MatrixTools.GetColumnAverages(this.Data);
            var logPsd = DataTools.LogValues(psd);

            FileTools.WriteArray2File(logPsd, path + ".csv");
            GraphsAndCharts.DrawGraph(logPsd, "log PSD", new FileInfo(path));

            //GraphsAndCharts.DrawGraph("Title", psd, width, height, 4 new FileInfo(path));
            //image.Save(path, ImageFormat.Png);
        }
        public static Dictionary <string, double[]> AverageIndicesOverMinutes(Dictionary <string, double[, ]> allIndices, int startRowId, int endRowId)
        {
            var opIndices = new Dictionary <string, double[]>();

            var keys = allIndices.Keys;

            foreach (string key in keys)
            {
                var success = allIndices.TryGetValue(key, out double[,] matrix);
                if (success)
                {
                    var colCount  = matrix.GetLength(1);
                    var subMatrix = MatrixTools.Submatrix(matrix, startRowId, 0, endRowId, colCount - 1);
                    opIndices.Add(key, MatrixTools.GetColumnAverages(subMatrix));
                }
            }

            return(opIndices);
        }
Пример #5
0
        /// <summary>
        /// THE KEY ANALYSIS METHOD
        /// </summary>
        /// <param name="recording">
        ///     The segment Of Source File.
        /// </param>
        /// <param name="configDict">
        ///     The config Dict.
        /// </param>
        /// <param name="value"></param>
        /// <returns>
        /// The <see cref="LimnodynastesConvexResults"/>.
        /// </returns>
        internal static LimnodynastesConvexResults Analysis(
            Dictionary <string, double[, ]> dictionaryOfHiResSpectralIndices,
            AudioRecording recording,
            Dictionary <string, string> configDict,
            AnalysisSettings analysisSettings,
            SegmentSettingsBase segmentSettings)
        {
            // for Limnodynastes convex, in the D.Stewart CD, there are peaks close to:
            //1. 1950 Hz
            //2. 1460 hz
            //3.  970 hz    These are 490 Hz apart.
            // for Limnodynastes convex, in the JCU recording, there are peaks close to:
            //1. 1780 Hz
            //2. 1330 hz
            //3.  880 hz    These are 450 Hz apart.

            // So strategy is to look for three peaks separated by same amount and in the vicinity of the above,
            //  starting with highest power (the top peak) and working down to lowest power (bottom peak).

            var      outputDir          = segmentSettings.SegmentOutputDirectory;
            TimeSpan segmentStartOffset = segmentSettings.SegmentStartOffset;

            //KeyValuePair<string, double[,]> kvp = dictionaryOfHiResSpectralIndices.First();
            var spg         = dictionaryOfHiResSpectralIndices["RHZ"];
            int rhzRowCount = spg.GetLength(0);
            int rhzColCount = spg.GetLength(1);

            int    sampleRate        = recording.SampleRate;
            double herzPerBin        = sampleRate / 2 / (double)rhzRowCount;
            double scoreThreshold    = (double?)double.Parse(configDict["EventThreshold"]) ?? 3.0;
            int    minimumFrequency  = (int?)int.Parse(configDict["MinHz"]) ?? 850;
            int    dominantFrequency = (int?)int.Parse(configDict["DominantFrequency"]) ?? 1850;

            // # The Limnodynastes call has three major peaks. The dominant peak is at 1850 or as set above.
            // # The second and third peak are at equal gaps below. DominantFreq-gap and DominantFreq-(2*gap);
            // # Set the gap in the Config file. Should typically be in range 880 to 970
            int peakGapInHerz = (int?)int.Parse(configDict["PeakGap"]) ?? 470;
            int F1AndF2Gap    = (int)Math.Round(peakGapInHerz / herzPerBin);
            //int F1AndF2Gap = 10; // 10 = number of freq bins
            int F1AndF3Gap = 2 * F1AndF2Gap;
            //int F1AndF3Gap = 20;

            int hzBuffer       = 250;
            int bottomBin      = 5;
            int dominantBin    = (int)Math.Round(dominantFrequency / herzPerBin);
            int binBuffer      = (int)Math.Round(hzBuffer / herzPerBin);;
            int dominantBinMin = dominantBin - binBuffer;
            int dominantBinMax = dominantBin + binBuffer;

            //  freqBin + rowID = binCount - 1;
            // therefore: rowID = binCount - freqBin - 1;
            int minRowID  = rhzRowCount - dominantBinMax - 1;
            int maxRowID  = rhzRowCount - dominantBinMin - 1;
            int bottomRow = rhzRowCount - bottomBin - 1;

            var list = new List <Point>();

            // loop through all spectra/columns of the hi-res spectrogram.
            for (int c = 1; c < rhzColCount - 1; c++)
            {
                double maxAmplitude            = -double.MaxValue;
                int    idOfRowWithMaxAmplitude = 0;

                for (int r = minRowID; r <= bottomRow; r++)
                {
                    if (spg[r, c] > maxAmplitude)
                    {
                        maxAmplitude            = spg[r, c];
                        idOfRowWithMaxAmplitude = r;
                    }
                }

                if (idOfRowWithMaxAmplitude < minRowID)
                {
                    continue;
                }
                if (idOfRowWithMaxAmplitude > maxRowID)
                {
                    continue;
                }

                // want a spectral peak.
                if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c - 1])
                {
                    continue;
                }
                if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c + 1])
                {
                    continue;
                }
                // peak should exceed thresold amplitude
                if (spg[idOfRowWithMaxAmplitude, c] < 3.0)
                {
                    continue;
                }

                // convert row ID to freq bin ID
                int freqBinID = rhzRowCount - idOfRowWithMaxAmplitude - 1;
                list.Add(new Point(c, freqBinID));
                // we now have a list of potential hits for LimCon. This needs to be filtered.

                // Console.WriteLine("Col {0}, Bin {1}  ", c, freqBinID);
            }

            // DEBUG ONLY // ################################ TEMPORARY ################################
            // superimpose point on RHZ HiRes spectrogram for debug purposes
            bool drawOnHiResSpectrogram = true;
            //string filePath = @"G:\SensorNetworks\Output\Frogs\TestOfHiResIndices-2016July\Test\Towsey.HiResIndices\SpectrogramImages\3mile_creek_dam_-_Herveys_Range_1076_248366_20130305_001700_30_0min.CombinedGreyScale.png";
            var    fileName   = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name);
            string filePath   = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScale.png";
            var    debugImage = new FileInfo(filePath);

            if (!debugImage.Exists)
            {
                drawOnHiResSpectrogram = false;
            }
            if (drawOnHiResSpectrogram)
            {
                // put red dot where max is
                Bitmap bmp = new Bitmap(filePath);
                foreach (Point point in list)
                {
                    bmp.SetPixel(point.X + 70, 1911 - point.Y, Color.Red);
                }
                // mark off every tenth frequency bin
                for (int r = 0; r < 26; r++)
                {
                    bmp.SetPixel(68, 1911 - (r * 10), Color.Blue);
                    bmp.SetPixel(69, 1911 - (r * 10), Color.Blue);
                }
                // mark off upper bound and lower frequency bound
                bmp.SetPixel(69, 1911 - dominantBinMin, Color.Lime);
                bmp.SetPixel(69, 1911 - dominantBinMax, Color.Lime);
                //bmp.SetPixel(69, 1911 - maxRowID, Color.Lime);
                string opFilePath = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScaleAnnotated.png";
                bmp.Save(opFilePath);
            }
            // END DEBUG ################################ TEMPORARY ################################

            // now construct the standard decibel spectrogram WITHOUT noise removal, and look for LimConvex
            // get frame parameters for the analysis
            double epsilon   = Math.Pow(0.5, recording.BitsPerSample - 1);
            int    frameSize = rhzRowCount * 2;
            int    frameStep = frameSize; // this default = zero overlap
            double frameDurationInSeconds = frameSize / (double)sampleRate;
            double frameStepInSeconds     = frameStep / (double)sampleRate;
            double framesPerSec           = 1 / frameStepInSeconds;
            //var dspOutput = DSP_Frames.ExtractEnvelopeAndFFTs(recording, frameSize, frameStep);
            //// Generate deciBel spectrogram
            //double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.amplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon);

            // i: Init SONOGRAM config
            var sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameSize,
                WindowOverlap      = 0.0,
                NoiseReductionType = NoiseReductionType.None,
            };
            // init sonogram
            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // remove the DC row of the spectrogram
            sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);
            //scores.Add(new Plot("Decibels", DataTools.NormaliseMatrixValues(dBArray), ActivityAndCover.DefaultActivityThresholdDb));
            //scores.Add(new Plot("Active Frames", DataTools.Bool2Binary(activity.activeFrames), 0.0));

            // convert spectral peaks to frequency
            //var tuple_DecibelPeaks = SpectrogramTools.HistogramOfSpectralPeaks(deciBelSpectrogram);
            //int[] peaksBins = tuple_DecibelPeaks.Item2;
            //double[] freqPeaks = new double[peaksBins.Length];
            //int binCount = sonogram.Data.GetLength(1);
            //for (int i = 1; i < peaksBins.Length; i++) freqPeaks[i] = (lowerBinBound + peaksBins[i]) / (double)nyquistBin;
            //scores.Add(new Plot("Max Frequency", freqPeaks, 0.0));  // location of peaks for spectral images

            // create new list of LimCon hits in the standard spectrogram.
            double timeSpanOfFrameInSeconds = frameSize / (double)sampleRate;
            var    newList     = new List <int[]>();
            int    lastFrameID = sonogram.Data.GetLength(0) - 1;
            int    lastBinID   = sonogram.Data.GetLength(1) - 1;

            foreach (Point point in list)
            {
                double secondsFromStartOfSegment = (point.X * 0.1) + 0.05; // convert point.Y to center of time-block.
                int    framesFromStartOfSegment  = (int)Math.Round(secondsFromStartOfSegment / timeSpanOfFrameInSeconds);

                // location of max point is uncertain, so search in neighbourhood.
                // NOTE: sonogram.data matrix is time*freqBin
                double maxValue = -double.MaxValue;
                int    idOfTMax = framesFromStartOfSegment;
                int    idOfFMax = point.Y;
                for (int deltaT = -4; deltaT <= 4; deltaT++)
                {
                    for (int deltaF = -1; deltaF <= 1; deltaF++)
                    {
                        int newT = framesFromStartOfSegment + deltaT;
                        if (newT < 0)
                        {
                            newT = 0;
                        }
                        else if (newT > lastFrameID)
                        {
                            newT = lastFrameID;
                        }

                        double value = sonogram.Data[newT, point.Y + deltaF];
                        if (value > maxValue)
                        {
                            maxValue = value;
                            idOfTMax = framesFromStartOfSegment + deltaT;
                            idOfFMax = point.Y + deltaF;
                        }
                    }
                }

                // newList.Add(new Point(frameSpan, point.Y));
                int[] array = new int[2];
                array[0] = idOfTMax;
                array[1] = idOfFMax;
                newList.Add(array);
            }

            // Now obtain more of spectrogram to see if have peaks at two other places characteristic of Limnodynastes convex.
            // In the D.Stewart CD, there are peaks close to:
            //1. 1950 Hz
            //2. 1460 hz
            //3.  970 hz    These are 490 Hz apart.
            // For Limnodynastes convex, in the JCU recording, there are peaks close to:
            //1. 1780 Hz
            //2. 1330 hz
            //3.  880 hz    These are 450 Hz apart.

            // So strategy is to look for three peaks separated by same amount and in the vicinity of the above,
            //  starting with highest power (the top peak) and working down to lowest power (bottom peak).
            //We have found top/highest peak - now find the other two.
            int secondDominantFrequency = 1380;
            int secondDominantBin       = (int)Math.Round(secondDominantFrequency / herzPerBin);
            int thirdDominantFrequency  = 900;
            int thirdDominantBin        = (int)Math.Round(thirdDominantFrequency / herzPerBin);

            var acousticEvents = new List <AcousticEvent>();
            int Tbuffer        = 2;

            // First extract a sub-matrix.
            foreach (int[] array in newList)
            {
                // NOTE: sonogram.data matrix is time*freqBin
                int Tframe = array[0];
                int F1bin  = array[1];
                double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - Tbuffer, 0, Tframe + Tbuffer, F1bin);
                double F1power = subMatrix[Tbuffer, F1bin];
                // convert to vector
                var spectrum = MatrixTools.GetColumnAverages(subMatrix);

                // use the following code to get estimate of background noise
                double[,] powerMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - 3, 10, Tframe + 3, F1bin);
                double averagePower = (MatrixTools.GetRowAverages(powerMatrix)).Average();
                double score        = F1power - averagePower;

                // debug - checking what the spectrum looks like.
                //for (int i = 0; i < 18; i++)
                //    spectrum[i] = -100.0;
                //DataTools.writeBarGraph(spectrum);

                // locate the peaks in lower frequency bands, F2 and F3
                bool[] peaks = DataTools.GetPeaks(spectrum);

                int    F2bin   = 0;
                double F2power = -200.0; // dB
                for (int i = -3; i <= 2; i++)
                {
                    int bin = F1bin - F1AndF2Gap + i;
                    if ((peaks[bin]) && (F2power < subMatrix[1, bin]))
                    {
                        F2bin   = bin;
                        F2power = subMatrix[1, bin];
                    }
                }
                if (F2bin == 0)
                {
                    continue;
                }
                if (F2power == -200.0)
                {
                    continue;
                }
                score += (F2power - averagePower);

                int    F3bin   = 0;
                double F3power = -200.0;
                for (int i = -5; i <= 2; i++)
                {
                    int bin = F1bin - F1AndF3Gap + i;
                    if ((peaks[bin]) && (F3power < subMatrix[1, bin]))
                    {
                        F3bin   = bin;
                        F3power = subMatrix[1, bin];
                    }
                }
                if (F3bin == 0)
                {
                    continue;
                }
                if (F3power == -200.0)
                {
                    continue;
                }

                score += (F3power - averagePower);
                score /= 3;

                // ignore events where SNR < decibel threshold
                if (score < scoreThreshold)
                {
                    continue;
                }

                // ignore events with wrong power distribution. A good LimnoConvex call has strongest F1 power
                if ((F3power > F1power) || (F2power > F1power))
                {
                    continue;
                }

                //freq Bin ID must be converted back to Matrix row ID
                //  freqBin + rowID = binCount - 1;
                // therefore: rowID = binCount - freqBin - 1;
                minRowID = rhzRowCount - F1bin - 2;
                maxRowID = rhzRowCount - F3bin - 1;
                int F1RowID = rhzRowCount - F1bin - 1;
                int F2RowID = rhzRowCount - F2bin - 1;
                int F3RowID = rhzRowCount - F3bin - 1;

                int    maxfreq             = dominantFrequency + hzBuffer;
                int    topBin              = (int)Math.Round(maxfreq / herzPerBin);
                int    frameCount          = 4;
                double duration            = frameCount * frameStepInSeconds;
                double startTimeWrtSegment = (Tframe - 2) * frameStepInSeconds;

                // Got to here so start initialising an acoustic event
                var ae = new AcousticEvent(segmentStartOffset, startTimeWrtSegment, duration, minimumFrequency, maxfreq);
                ae.SetTimeAndFreqScales(framesPerSec, herzPerBin);
                //var ae = new AcousticEvent(oblong, recording.Nyquist, binCount, frameDurationInSeconds, frameStepInSeconds, frameCount);
                //ae.StartOffset = TimeSpan.FromSeconds(Tframe * frameStepInSeconds);

                var pointF1 = new Point(2, topBin - F1bin);
                var pointF2 = new Point(2, topBin - F2bin);
                var pointF3 = new Point(2, topBin - F3bin);
                ae.Points = new List <Point>();
                ae.Points.Add(pointF1);
                ae.Points.Add(pointF2);
                ae.Points.Add(pointF3);
                //tried using HitElements but did not do what I wanted later on.
                //ae.HitElements = new HashSet<Point>();
                //ae.HitElements = new SortedSet<Point>();
                //ae.HitElements.Add(pointF1);
                //ae.HitElements.Add(pointF2);
                //ae.HitElements.Add(pointF3);
                ae.Score = score;
                //ae.MinFreq = Math.Round((topBin - F3bin - 5) * herzPerBin);
                //ae.MaxFreq = Math.Round(topBin * herzPerBin);
                acousticEvents.Add(ae);
            }

            // now add in extra common info to the acoustic events
            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = configDict[AnalysisKeys.SpeciesName];
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recording.Duration.TotalSeconds;
                ae.Name         = abbreviatedName;
                ae.BorderColour = Color.Red;
                ae.FileName     = recording.BaseName;
            });

            double[] scores = new double[rhzColCount];          // predefinition of score array
            double   nomalisationConstant = scoreThreshold * 4; // four times the score threshold
            double   compressionFactor    = rhzColCount / (double)sonogram.Data.GetLength(0);

            foreach (AcousticEvent ae in acousticEvents)
            {
                ae.ScoreNormalised = ae.Score / nomalisationConstant;
                if (ae.ScoreNormalised > 1.0)
                {
                    ae.ScoreNormalised = 1.0;
                }
                int frameID      = (int)Math.Round(ae.EventStartSeconds / frameDurationInSeconds);
                int hiresFrameID = (int)Math.Floor(frameID * compressionFactor);
                scores[hiresFrameID] = ae.ScoreNormalised;
            }
            var plot = new Plot(AnalysisName, scores, scoreThreshold);

            // DEBUG ONLY ################################ TEMPORARY ################################
            // Draw a standard spectrogram and mark of hites etc.
            bool createStandardDebugSpectrogram = true;

            var imageDir = new DirectoryInfo(outputDir.FullName + @"\SpectrogramImages");

            if (!imageDir.Exists)
            {
                imageDir.Create();
            }
            if (createStandardDebugSpectrogram)
            {
                var    fileName2 = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name);
                string filePath2 = Path.Combine(imageDir.FullName, fileName + ".Spectrogram.png");
                Bitmap sonoBmp   = (Bitmap)sonogram.GetImage();
                int    height    = sonoBmp.Height;
                foreach (AcousticEvent ae in acousticEvents)
                {
                    ae.DrawEvent(sonoBmp);
                    //g.DrawRectangle(pen, ob.ColumnLeft, ob.RowTop, ob.ColWidth-1, ob.RowWidth);
                    //ae.DrawPoint(sonoBmp, ae.HitElements.[0], Color.OrangeRed);
                    //ae.DrawPoint(sonoBmp, ae.HitElements[1], Color.Yellow);
                    //ae.DrawPoint(sonoBmp, ae.HitElements[2], Color.Green);
                    ae.DrawPoint(sonoBmp, ae.Points[0], Color.OrangeRed);
                    ae.DrawPoint(sonoBmp, ae.Points[1], Color.Yellow);
                    ae.DrawPoint(sonoBmp, ae.Points[2], Color.LimeGreen);
                }

                // draw the original hits on the standard sonogram
                foreach (int[] array in newList)
                {
                    sonoBmp.SetPixel(array[0], height - array[1], Color.Cyan);
                }

                // mark off every tenth frequency bin on the standard sonogram
                for (int r = 0; r < 20; r++)
                {
                    sonoBmp.SetPixel(0, height - (r * 10) - 1, Color.Blue);
                    sonoBmp.SetPixel(1, height - (r * 10) - 1, Color.Blue);
                }
                // mark off upper bound and lower frequency bound
                sonoBmp.SetPixel(0, height - dominantBinMin, Color.Lime);
                sonoBmp.SetPixel(0, height - dominantBinMax, Color.Lime);
                sonoBmp.Save(filePath2);
            }
            // END DEBUG ################################ TEMPORARY ################################

            return(new LimnodynastesConvexResults
            {
                Sonogram = sonogram,
                Hits = null,
                Plot = plot,
                Events = acousticEvents,
                RecordingDuration = recording.Duration,
            });
        } // Analysis()
        /// <summary>
        /// Calculate summary statistics for supplied temporal and spectral targets.
        /// </summary>
        /// <remarks>
        /// The acoustic statistics calculated in this method are based on methods outlined in
        /// "Acoustic classification of multiple simultaneous bird species: A multi-instance multi-label approach",
        /// by Forrest Briggs, Balaji Lakshminarayanan, Lawrence Neal, Xiaoli Z.Fern, Raviv Raich, Sarah J.K.Hadley, Adam S. Hadley, Matthew G. Betts, et al.
        /// The Journal of the Acoustical Society of America v131, pp4640 (2012); doi: http://dx.doi.org/10.1121/1.4707424
        /// ..
        /// The Briggs feature are calculated from the column (freq bin) and row (frame) sums of the extracted spectrogram.
        /// 1. Gini Index for frame and bin sums. A measure of dispersion. Problem with gini is that its value is dependent on the row or column count.
        ///    We use entropy instead because value not dependent on row or column count because it is normalized.
        /// For the following meausres of k-central moments, the freq and time values are normalized in 0,1 to width of the event.
        /// 2. freq-mean
        /// 3. freq-variance
        /// 4. freq-skew and kurtosis
        /// 5. time-mean
        /// 6. time-variance
        /// 7. time-skew and kurtosis
        /// 8. freq-max (normalized)
        /// 9. time-max (normalized)
        /// 10. Briggs et al also calculate a 16 value histogram of gradients for each event mask. We do not do that here although we could.
        /// ...
        /// NOTE 1: There are differences between our method of noise reduction and Briggs. Briggs does not convert to decibels
        /// and instead works with power values. He obtains a noise profile from the 20% of frames having the lowest energy sum.
        /// NOTE 2: To NormaliseMatrixValues for noise, they divide the actual energy by the noise value. This is equivalent to subtraction when working in decibels.
        ///         There are advantages and disadvantages to Briggs method versus ours. In our case, we hve to convert decibel values back to
        ///         energy values when calculating the statistics for the extracted acoustic event.
        /// NOTE 3: We do not calculate the higher central moments of the time/frequency profiles, i.e. skew and kurtosis.
        ///         Ony mean and standard deviation.
        /// ..
        /// NOTE 4: This method assumes that the passed event occurs totally within the passed recording,
        /// AND that the passed recording is of sufficient duration to obtain reliable BGN noise profile
        /// BUT not so long as to cause memory constipation.
        /// </remarks>
        /// <param name="recording">as type AudioRecording which contains the event</param>
        /// <param name="temporalTarget">Both start and end bounds - relative to the supplied recording</param>
        /// <param name="spectralTarget">both bottom and top bounds in Hertz</param>
        /// <param name="config">parameters that determine the outcome of the analysis</param>
        /// <param name="segmentStartOffset">How long since the start of the recording this event occurred</param>
        /// <returns>an instance of EventStatistics</returns>
        public static EventStatistics AnalyzeAudioEvent(
            AudioRecording recording,
            Range <TimeSpan> temporalTarget,
            Range <double> spectralTarget,
            EventStatisticsConfiguration config,
            TimeSpan segmentStartOffset)
        {
            var stats = new EventStatistics
            {
                EventStartSeconds      = temporalTarget.Minimum.TotalSeconds,
                EventEndSeconds        = temporalTarget.Maximum.TotalSeconds,
                LowFrequencyHertz      = spectralTarget.Minimum,
                HighFrequencyHertz     = spectralTarget.Maximum,
                SegmentDurationSeconds = recording.Duration.TotalSeconds,
                SegmentStartSeconds    = segmentStartOffset.TotalSeconds,
            };

            // temporal target is supplied relative to recording, but not the supplied audio segment
            // shift coordinates relative to segment
            var localTemporalTarget = temporalTarget.Shift(-segmentStartOffset);

            if (!recording
                .Duration
                .AsRangeFromZero(Topology.Inclusive)
                .Contains(localTemporalTarget))
            {
                stats.Error        = true;
                stats.ErrorMessage =
                    $"Audio not long enough ({recording.Duration}) to analyze target ({localTemporalTarget})";

                return(stats);
            }

            // convert recording to spectrogram
            int    sampleRate = recording.SampleRate;
            double epsilon    = recording.Epsilon;

            // extract the spectrogram
            var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, config.FrameSize, config.FrameStep);

            double hertzBinWidth         = dspOutput1.FreqBinWidth;
            var    stepDurationInSeconds = config.FrameStep / (double)sampleRate;
            var    startFrame            = (int)Math.Ceiling(localTemporalTarget.Minimum.TotalSeconds / stepDurationInSeconds);

            // subtract 1 frame because want to end before start of end point.
            var endFrame = (int)Math.Floor(localTemporalTarget.Maximum.TotalSeconds / stepDurationInSeconds) - 1;

            var bottomBin = (int)Math.Floor(spectralTarget.Minimum / hertzBinWidth);
            var topBin    = (int)Math.Ceiling(spectralTarget.Maximum / hertzBinWidth);

            // Events can have their high value set to the nyquist.
            // Since the submatrix call below uses an inclusive upper bound an index out of bounds exception occurs in
            // these cases. So we just ask for the bin below.
            if (topBin >= config.FrameSize / 2)
            {
                topBin = (config.FrameSize / 2) - 1;
            }

            // Convert amplitude spectrogram to deciBels and calculate the dB background noise profile
            double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);

            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0);

            // extract the required acoustic event
            var eventMatrix = MatrixTools.Submatrix(decibelSpectrogram, startFrame, bottomBin, endFrame, topBin);

            // Get the SNR of the event. This is just the max value in the matrix because noise reduced
            MatrixTools.MinMax(eventMatrix, out _, out double max);
            stats.SnrDecibels = max;

            // Now need to convert event matrix back to energy values before calculating other statistics
            eventMatrix = MatrixTools.Decibels2Power(eventMatrix);

            var columnAverages = MatrixTools.GetColumnAverages(eventMatrix);
            var rowAverages    = MatrixTools.GetRowAverages(eventMatrix);

            // calculate the mean and temporal standard deviation in decibels
            NormalDist.AverageAndSD(rowAverages, out double mean, out double stddev);
            stats.MeanDecibels           = 10 * Math.Log10(mean);
            stats.TemporalStdDevDecibels = 10 * Math.Log10(stddev);

            // calculate the frequency standard deviation in decibels
            NormalDist.AverageAndSD(columnAverages, out mean, out stddev);
            stats.FreqBinStdDevDecibels = 10 * Math.Log10(stddev);

            // calculate relative location of the temporal maximum
            int maxRowId = DataTools.GetMaxIndex(rowAverages);

            stats.TemporalMaxRelative = maxRowId / (double)rowAverages.Length;

            // calculate the entropy dispersion/concentration indices
            stats.TemporalEnergyDistribution = 1 - DataTools.EntropyNormalised(rowAverages);
            stats.SpectralEnergyDistribution = 1 - DataTools.EntropyNormalised(columnAverages);

            // calculate the spectral centroid and the dominant frequency
            double binCentroid = CalculateSpectralCentroid(columnAverages);

            stats.SpectralCentroid = (int)Math.Round(hertzBinWidth * binCentroid) + (int)spectralTarget.Minimum;
            int maxColumnId = DataTools.GetMaxIndex(columnAverages);

            stats.DominantFrequency = (int)Math.Round(hertzBinWidth * maxColumnId) + (int)spectralTarget.Minimum;

            // remainder of this method is to produce debugging images. Can comment out when not debugging.

            /*
             * var normalisedIndex = DataTools.NormaliseMatrixValues(columnAverages);
             * var image4 = GraphsAndCharts.DrawGraph("columnSums", normalisedIndex, 100);
             * string path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\columnSums.png";
             * image4.Save(path4);
             * normalisedIndex = DataTools.NormaliseMatrixValues(rowAverages);
             * image4 = GraphsAndCharts.DrawGraph("rowSums", normalisedIndex, 100);
             * path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\rowSums.png";
             * image4.Save(path4);
             */
            return(stats);
        }
        /// <summary>
        /// Remove events whose acoustic profile does not match that of a flying fox.
        /// </summary>
        /// <param name="events">unfiltered acoustic events.</param>
        /// <param name="sonogram">includes matrix of spectrogram values.</param>
        /// <returns>filtered acoustic events.</returns>
        private static List <AcousticEvent> FilterEventsForSpectralProfile(List <AcousticEvent> events, BaseSonogram sonogram)
        {
            double[,] spectrogramData = sonogram.Data;

            //int colCount = spectrogramData.GetLength(1);

            // The following freq bins are used to demarcate freq bands for spectral tests below.
            // The hertz values are hard coded but could be included in the config.yml file.
            int maxBin        = (int)Math.Round(8000 / sonogram.FBinWidth);
            int fourKiloHzBin = (int)Math.Round(4000 / sonogram.FBinWidth);
            int oneKiloHzBin  = (int)Math.Round(1000 / sonogram.FBinWidth);

            var filteredEvents = new List <AcousticEvent>();

            foreach (AcousticEvent ae in events)
            {
                int startFrame = ae.Oblong.RowTop;

                //int endFrame = ae.Oblong.RowBottom;

                // get all the frames of the acoustic event
                //var subMatrix = DataTools.Submatrix(spectrogramData, startFrame, 0, endFrame, colCount - 1);

                // get only the frames from centre of the acoustic event
                var subMatrix          = DataTools.Submatrix(spectrogramData, startFrame + 1, 0, startFrame + 4, maxBin);
                var spectrum           = MatrixTools.GetColumnAverages(subMatrix);
                var normalisedSpectrum = DataTools.normalise(spectrum);
                normalisedSpectrum = DataTools.filterMovingAverageOdd(normalisedSpectrum, 11);
                var maxId = DataTools.GetMaxIndex(normalisedSpectrum);

                //var hzMax = (int)Math.Ceiling(maxId * sonogram.FBinWidth);

                // Do TESTS to determine if event has spectrum matching a Flying fox.
                // Test 1: Spectral maximum should be below 4 kHz.
                bool passTest1 = maxId < fourKiloHzBin;

                // Test 2: There should be little energy in 0-1 kHz band.
                var    subband1Khz  = DataTools.Subarray(normalisedSpectrum, 0, oneKiloHzBin);
                double bandArea1    = subband1Khz.Sum();
                double energyRatio1 = bandArea1 / normalisedSpectrum.Sum();

                // 0.125  = 1/8.  i.e. test requires that energy in 0-1kHz band is less than average in all 8 kHz bands
                // 0.0938 = 3/32. i.e. test requires that energy in 0-1kHz band is less than 3/4 average in all 8 kHz bands
                // 0.0625 = 1/16. i.e. test requires that energy in 0-1kHz band is less than half average in all 8 kHz bands
                bool passTest2 = !(energyRatio1 > 0.1);

                // Test 3: There should be little energy in 4-5 kHz band.
                var    subband4Khz  = DataTools.Subarray(normalisedSpectrum, fourKiloHzBin, oneKiloHzBin);
                double bandArea2    = subband4Khz.Sum();
                double energyRatio2 = bandArea2 / normalisedSpectrum.Sum();
                bool   passTest3    = !(energyRatio2 > 0.125);

                // TODO write method to determine similarity of spectrum to a true flying fox spectrum.
                // Problem: it is not certain how variable the FF spectra are.
                // In ten minutes of recording used so far, which include 14-15 obvious calls, there appear to be two spectral types.
                // One type has three peaks at around 1.5 kHz, 3 kHz and 6 kHz.
                // The other type have two peaks around 2.5 and 5.5 kHz.

                //if (passTest1)
                //if (true)
                if (passTest1 && passTest2 && passTest3)
                {
                    filteredEvents.Add(ae);

                    //DEBUG SPECTRAL PROFILES: UNCOMMENT following lines to get spectral profiles of the events.

                    /*
                     * double startSecond = ae.EventStartSeconds - ae.SegmentStartSeconds;
                     * string name = "CallSpectrum " + (ae.SegmentStartSeconds / 60) + "m" + (int)Math.Floor(startSecond) + "s hzMax" + hzMax;
                     * var bmp2 = GraphsAndCharts.DrawGraph(name, normalisedSpectrum, 100);
                     * bmp2.Save(Path.Combine(@"PATH\Towsey.PteropusSpecies", name + ".png"));
                     */
                }
            }

            return(filteredEvents);
        }
Пример #8
0
        public static void SimilarityIndex(double[] channelL, double[] channelR, double epsilon, int sampleRate,
                                           out double similarityIndex, out double decibelIndex,
                                           out double avDecibelBias, out double medianDecibelBias,
                                           out double lowFreqDbBias, out double midFreqDbBias, out double hiFreqDbBias)
        {
            //var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFFTs(subsegmentRecording, frameSize, frameStep);
            int frameSize = 512;
            int frameStep = 512;

            frameSize *= 16; // take longer window to get low freq
            frameStep *= 16;

            var dspOutputL  = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelL, sampleRate, epsilon, frameSize, frameStep);
            var avSpectrumL = MatrixTools.GetColumnAverages(dspOutputL.AmplitudeSpectrogram);

            //var medianSpectrumL = MatrixTools.GetColumnMedians(dspOutputL.amplitudeSpectrogram);

            var dspOutputR  = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(channelR, sampleRate, epsilon, frameSize, frameStep);
            var avSpectrumR = MatrixTools.GetColumnAverages(dspOutputR.AmplitudeSpectrogram);

            //var medianSpectrumR = MatrixTools.GetColumnMedians(dspOutputR.amplitudeSpectrogram);

            similarityIndex = 0.0;
            decibelIndex    = 0.0;
            for (int i = 0; i < avSpectrumR.Length; i++)
            {
                double min = Math.Min(avSpectrumL[i], avSpectrumR[i]);
                double max = Math.Max(avSpectrumL[i], avSpectrumR[i]);
                if (max <= 0.000001)
                {
                    max = 0.000001;  // to prevent division by zero.
                }

                // index = min / max;
                double index = min * min / (max * max);
                similarityIndex += index;

                double dBmin = 20 * Math.Log10(min);
                double dBmax = 20 * Math.Log10(max);
                decibelIndex += dBmax - dBmin;
            }

            similarityIndex /= avSpectrumR.Length;
            decibelIndex    /= avSpectrumR.Length;

            double medianLeft  = Statistics.GetMedian(avSpectrumL);
            double medianRight = Statistics.GetMedian(avSpectrumR);

            medianDecibelBias = medianLeft - medianRight;

            // init values
            avDecibelBias = 0.0;
            lowFreqDbBias = 0.0;

            // calculate the freq band bounds for 2kHz and 7khz.
            int lowBound = frameSize * 2000 / sampleRate;
            int midBound = frameSize * 7000 / sampleRate;

            for (int i = 0; i < lowBound; i++)
            {
                double dbLeft  = 20 * Math.Log10(avSpectrumL[i]);
                double dbRight = 20 * Math.Log10(avSpectrumR[i]);
                avDecibelBias += dbLeft - dbRight;
                lowFreqDbBias += dbLeft - dbRight;
            }

            midFreqDbBias = 0.0;
            for (int i = lowBound; i < midBound; i++)
            {
                double dbLeft  = 20 * Math.Log10(avSpectrumL[i]);
                double dbRight = 20 * Math.Log10(avSpectrumR[i]);
                avDecibelBias += dbLeft - dbRight;
                midFreqDbBias += dbLeft - dbRight;
            }

            hiFreqDbBias = 0.0;
            for (int i = midBound; i < avSpectrumR.Length; i++)
            {
                double dbLeft  = 20 * Math.Log10(avSpectrumL[i]);
                double dbRight = 20 * Math.Log10(avSpectrumR[i]);
                avDecibelBias += dbLeft - dbRight;
                hiFreqDbBias  += dbLeft - dbRight;
            }

            avDecibelBias /= avSpectrumR.Length;
            lowFreqDbBias /= lowBound;
            midFreqDbBias /= midBound - lowBound;
            hiFreqDbBias  /= avSpectrumR.Length - midBound;
        }
        public void PowerSpectrumDensityTest()
        {
            var inputPath                 = @"C:\Users\kholghim\Mahnoosh\Liz\TrainSet\";
            var resultPsdPath             = @"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\train_LogPSD.bmp";
            var resultNoiseReducedPsdPath = @"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\train_LogPSD_NoiseReduced.bmp";

            //var inputPath =Path.Combine(inputDir, "TrainSet"); // directory of the one-min recordings of one day (21 and 23 Apr - Black Rail Data)

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            // get the nyquist value from the first wav file in the folder of recordings
            int           nq            = new AudioRecording(Directory.GetFiles(inputPath, "*.wav")[0]).Nyquist;
            int           nyquist       = nq;  // 11025;
            int           frameSize     = 1024;
            int           finalBinCount = 512; //256; //
            int           hertzInterval = 1000;
            FreqScaleType scaleType     = FreqScaleType.Linear;
            //var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);
            //var fst = freqScale.ScaleType;
            //var fst = FreqScaleType.Linear;
            //var freqScale = new FrequencyScale(fst);

            var settings = new SpectrogramSettings()
            {
                WindowSize    = frameSize,
                WindowOverlap = 0.1028,

                //DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false,
                //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,

                //DoMelScale = false,
                MelBinCount = 256,
                DoMelScale  = (scaleType == FreqScaleType.Mel) ? true : false,
                //MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,

                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            var attributes = new SpectrogramAttributes()
            {
                NyquistFrequency = nyquist,
                Duration         = TimeSpan.FromMinutes(1440),
            };

            List <double[]> psd = new List <double[]>();

            foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);

                    //var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
                    //var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);
                    // save the matrix
                    // skip normalisation
                    // skip mel
                    settings.SourceFileName = recording.BaseName;

                    var spectrogram = new EnergySpectrogram(settings, recording.WavReader);
                    //var sonogram = new AmplitudeSpectrogram(settings, recording.WavReader);

                    //var energySpectrogram = new EnergySpectrogram(sonoConfig, amplitudeSpectrogram.Data);
                    //var energySpectrogram = new EnergySpectrogram(sonoConfig, recording.WavReader);
                    //var energySpectrogram = new EnergySpectrogram(settings, recording.WavReader);

                    // square the FFT coefficients to get an energy spectrogram
                    // double[,] energySpectrogram = PowerSpectrumDensity.GetEnergyValues(amplitudeSpectrogram.Data);

                    // RMS NORMALIZATION
                    //double[,] normalizedValues = SNR.RmsNormalization(energySpectro.Data);
                    //energySpectro.Data = SNR.RmsNormalization(energySpectro.Data);

                    // Median Noise Reduction
                    //spectrogram.Data = PcaWhitening.NoiseReduction(spectrogram.Data);
                    //spectrogram.Data = SNR.NoiseReduce_Standard(spectrogram.Data);

                    //double[] psd = PowerSpectralDensity.GetPowerSpectrum(noiseReducedValues);
                    //psd.Add(energySpectro.GetLogPsd());
                    psd.Add(MatrixTools.GetColumnAverages(spectrogram.Data));

                    //psd.Add(SpectrogramTools.CalculateAvgSpectrumFromEnergySpectrogram(normalizedValues));
                    //psd.Add(PowerSpectralDensity.GetPowerSpectrum(normalizedValues));
                }
            }

            // writing psd matrix to csv file
            //Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\psd.csv"), psd.ToArray().ToMatrix());
            //Image imagePsd = DecibelSpectrogram.DrawSpectrogramAnnotated(psd.ToArray().ToMatrix(), settings, attributes);
            //imagePsd.Save(resultPsdPath, ImageFormat.Bmp);
            var psdMatrix = psd.ToArray().ToMatrix();

            // calculate the log of matrix
            var logPsd = MatrixTools.Matrix2LogValues(psdMatrix);

            Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\logPsd.csv"), logPsd);

            var image = DecibelSpectrogram.DrawSpectrogramAnnotated(logPsd, settings, attributes);

            image.Save(resultPsdPath);

            var noiseReducedLogPsd = PcaWhitening.NoiseReduction(logPsd); //SNR.NoiseReduce_Standard(logPsd); //SNR.NoiseReduce_Mean(logPsd, 0.0);//SNR.NoiseReduce_Median(logPsd, 0.0); //

            Csv.WriteMatrixToCsv(new FileInfo(@"C:\Users\kholghim\Mahnoosh\Liz\PowerSpectrumDensity\logPsd_NoiseReduced.csv"), logPsd);

            var image2 = DecibelSpectrogram.DrawSpectrogramAnnotated(noiseReducedLogPsd, settings, attributes);

            image2.Save(resultNoiseReducedPsdPath);

            //ImageTools.DrawMatrix(psd.ToArray().ToMatrix(), resultPath);
            //ImageTools.DrawReversedMatrix(psd.ToArray().ToMatrix(), resultPath);
            //var data = MatrixTools.Matrix2LogValues(psd.ToArray().ToMatrix());
            //Image image = ImageTools.DrawReversedMatrixWithoutNormalisation(data);
            //Image image = ImageTools.DrawReversedMatrixWithoutNormalisation(logPsd);
        }