Exemple #1
0
        }//Execute

        public static Tuple <double[]> Execute_MFCC_XCOR(double[,] target, double dynamicRange, SpectrogramStandard sonogram,
                                                         List <AcousticEvent> segments, int minHz, int maxHz, double minDuration)
        {
            Log.WriteLine("SEARCHING FOR EVENTS LIKE TARGET.");
            if (segments == null)
            {
                return(null);
            }

            int minBin       = (int)(minHz / sonogram.FBinWidth);
            int maxBin       = (int)(maxHz / sonogram.FBinWidth);
            int targetLength = target.GetLength(0);

            //set up the matrix of cosine coefficients
            int coeffCount = 12;                                             //only use first 12 coefficients.
            int binCount   = target.GetLength(1);                            //number of filters in filter bank

            double[,] cosines = MFCCStuff.Cosines(binCount, coeffCount + 1); //set up the cosine coefficients

            //adjust target's dynamic range to that set by user
            target = SNR.SetDynamicRange(target, 3.0, dynamicRange); //set event's dynamic range
            target = MFCCStuff.Cepstra(target, coeffCount, cosines);
            double[] v1 = DataTools.Matrix2Array(target);
            v1 = DataTools.normalise2UnitLength(v1);

            string imagePath2 = @"C:\SensorNetworks\Output\FELT_Currawong\target.png";
            var    result1    = BaseSonogram.Data2ImageData(target);
            var    image      = result1.Item1;

            ImageTools.DrawMatrix(image, 1, 1, imagePath2);

            double[] scores = new double[sonogram.FrameCount];
            foreach (AcousticEvent av in segments)
            {
                Log.WriteLine("SEARCHING SEGMENT.");
                int startRow = (int)Math.Round(av.TimeStart * sonogram.FramesPerSecond);
                int endRow   = (int)Math.Round(av.TimeEnd * sonogram.FramesPerSecond);
                if (endRow >= sonogram.FrameCount)
                {
                    endRow = sonogram.FrameCount - 1;
                }

                endRow -= targetLength;
                if (endRow <= startRow)
                {
                    endRow = startRow + 1;  //want minimum of one row
                }

                for (int r = startRow; r < endRow; r++)
                {
                    double[,] matrix = DataTools.Submatrix(sonogram.Data, r, minBin, r + targetLength - 1, maxBin);
                    matrix           = SNR.SetDynamicRange(matrix, 3.0, dynamicRange); //set event's dynamic range

                    //string imagePath2 = @"C:\SensorNetworks\Output\FELT_Gecko\compare.png";
                    //var image = BaseSonogram.Data2ImageData(matrix);
                    //ImageTools.DrawMatrix(image, 1, 1, imagePath2);
                    matrix = MFCCStuff.Cepstra(matrix, coeffCount, cosines);

                    double[] v2 = DataTools.Matrix2Array(matrix);
                    v2 = DataTools.normalise2UnitLength(v2);
                    double crossCor = DataTools.DotProduct(v1, v2);
                    scores[r] = crossCor;
                } //end of rows in segment
            }     //foreach (AcousticEvent av in segments)

            var tuple = Tuple.Create(scores);

            return(tuple);
        }//Execute
 static void ConvertStringToSql(StringBuilder stringBuilder, string value)
 {
     DataTools.ConvertStringToSql(stringBuilder, "||", null, AppendConversionAction, value, null);
 }
        /// <summary>
        /// Calculate summary statistics for supplied temporal and spectral targets.
        /// </summary>
        /// <remarks>
        /// The acoustic statistics calculated in this method are based on methods outlined in
        /// "Acoustic classification of multiple simultaneous bird species: A multi-instance multi-label approach",
        /// by Forrest Briggs, Balaji Lakshminarayanan, Lawrence Neal, Xiaoli Z.Fern, Raviv Raich, Sarah J.K.Hadley, Adam S. Hadley, Matthew G. Betts, et al.
        /// The Journal of the Acoustical Society of America v131, pp4640 (2012); doi: http://dx.doi.org/10.1121/1.4707424
        /// ..
        /// The Briggs feature are calculated from the column (freq bin) and row (frame) sums of the extracted spectrogram.
        /// 1. Gini Index for frame and bin sums. A measure of dispersion. Problem with gini is that its value is dependent on the row or column count.
        ///    We use entropy instead because value not dependent on row or column count because it is normalized.
        /// For the following meausres of k-central moments, the freq and time values are normalized in 0,1 to width of the event.
        /// 2. freq-mean
        /// 3. freq-variance
        /// 4. freq-skew and kurtosis
        /// 5. time-mean
        /// 6. time-variance
        /// 7. time-skew and kurtosis
        /// 8. freq-max (normalized)
        /// 9. time-max (normalized)
        /// 10. Briggs et al also calculate a 16 value histogram of gradients for each event mask. We do not do that here although we could.
        /// ...
        /// NOTE 1: There are differences between our method of noise reduction and Briggs. Briggs does not convert to decibels
        /// and instead works with power values. He obtains a noise profile from the 20% of frames having the lowest energy sum.
        /// NOTE 2: To NormaliseMatrixValues for noise, they divide the actual energy by the noise value. This is equivalent to subtraction when working in decibels.
        ///         There are advantages and disadvantages to Briggs method versus ours. In our case, we hve to convert decibel values back to
        ///         energy values when calculating the statistics for the extracted acoustic event.
        /// NOTE 3: We do not calculate the higher central moments of the time/frequency profiles, i.e. skew and kurtosis.
        ///         Ony mean and standard deviation.
        /// ..
        /// NOTE 4: This method assumes that the passed event occurs totally within the passed recording,
        /// AND that the passed recording is of sufficient duration to obtain reliable BGN noise profile
        /// BUT not so long as to cause memory constipation.
        /// </remarks>
        /// <param name="recording">as type AudioRecording which contains the event.</param>
        /// <param name="temporalTarget">Both start and end bounds - relative to the supplied recording.</param>
        /// <param name="spectralTarget">both bottom and top bounds in Hertz.</param>
        /// <param name="config">parameters that determine the outcome of the analysis.</param>
        /// <param name="segmentStartOffset">How long since the start of the recording this event occurred.</param>
        /// <returns>an instance of EventStatistics.</returns>
        public static EventStatistics AnalyzeAudioEvent(
            AudioRecording recording,
            Interval <TimeSpan> temporalTarget,
            Interval <double> spectralTarget,
            EventStatisticsConfiguration config,
            TimeSpan segmentStartOffset)
        {
            var stats = new EventStatistics
            {
                EventStartSeconds      = temporalTarget.Minimum.TotalSeconds,
                EventEndSeconds        = temporalTarget.Maximum.TotalSeconds,
                LowFrequencyHertz      = spectralTarget.Minimum,
                HighFrequencyHertz     = spectralTarget.Maximum,
                SegmentDurationSeconds = recording.Duration.TotalSeconds,
                SegmentStartSeconds    = segmentStartOffset.TotalSeconds,
            };

            // temporal target is supplied relative to recording, but not the supplied audio segment
            // shift coordinates relative to segment
            var localTemporalTarget = temporalTarget.Shift(-segmentStartOffset);

            if (!recording
                .Duration
                .AsIntervalFromZero(Topology.Inclusive)
                .Contains(localTemporalTarget))
            {
                stats.Error        = true;
                stats.ErrorMessage =
                    $"Audio not long enough ({recording.Duration}) to analyze target ({localTemporalTarget})";

                return(stats);
            }

            // convert recording to spectrogram
            int    sampleRate = recording.SampleRate;
            double epsilon    = recording.Epsilon;

            // extract the spectrogram
            var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recording, config.FrameSize, config.FrameStep);

            double hertzBinWidth         = dspOutput1.FreqBinWidth;
            var    stepDurationInSeconds = config.FrameStep / (double)sampleRate;
            var    startFrame            = (int)Math.Ceiling(localTemporalTarget.Minimum.TotalSeconds / stepDurationInSeconds);

            // subtract 1 frame because want to end before start of end point.
            var endFrame = (int)Math.Floor(localTemporalTarget.Maximum.TotalSeconds / stepDurationInSeconds) - 1;

            var bottomBin = (int)Math.Floor(spectralTarget.Minimum / hertzBinWidth);
            var topBin    = (int)Math.Ceiling(spectralTarget.Maximum / hertzBinWidth);

            // Events can have their high value set to the nyquist.
            // Since the submatrix call below uses an inclusive upper bound an index out of bounds exception occurs in
            // these cases. So we just ask for the bin below.
            if (topBin >= config.FrameSize / 2)
            {
                topBin = (config.FrameSize / 2) - 1;
            }

            // Convert amplitude spectrogram to deciBels and calculate the dB background noise profile
            double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);

            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0);

            // extract the required acoustic event
            var eventMatrix = MatrixTools.Submatrix(decibelSpectrogram, startFrame, bottomBin, endFrame, topBin);

            // Get the SNR of the event. This is just the max value in the matrix because noise reduced
            MatrixTools.MinMax(eventMatrix, out _, out double max);
            stats.SnrDecibels = max;

            // Now need to convert event matrix back to energy values before calculating other statistics
            eventMatrix = MatrixTools.Decibels2Power(eventMatrix);

            var columnAverages = MatrixTools.GetColumnAverages(eventMatrix);
            var rowAverages    = MatrixTools.GetRowAverages(eventMatrix);

            // calculate the mean and temporal standard deviation in decibels
            NormalDist.AverageAndSD(rowAverages, out double mean, out double stddev);
            stats.MeanDecibels           = 10 * Math.Log10(mean);
            stats.TemporalStdDevDecibels = 10 * Math.Log10(stddev);

            // calculate the frequency standard deviation in decibels
            NormalDist.AverageAndSD(columnAverages, out mean, out stddev);
            stats.FreqBinStdDevDecibels = 10 * Math.Log10(stddev);

            // calculate relative location of the temporal maximum
            int maxRowId = DataTools.GetMaxIndex(rowAverages);

            stats.TemporalMaxRelative = maxRowId / (double)rowAverages.Length;

            // calculate the entropy dispersion/concentration indices
            stats.TemporalEnergyDistribution = 1 - DataTools.EntropyNormalised(rowAverages);
            stats.SpectralEnergyDistribution = 1 - DataTools.EntropyNormalised(columnAverages);

            // calculate the spectral centroid and the dominant frequency
            double binCentroid = CalculateSpectralCentroid(columnAverages);

            stats.SpectralCentroid = (int)Math.Round(hertzBinWidth * binCentroid) + (int)spectralTarget.Minimum;
            int maxColumnId = DataTools.GetMaxIndex(columnAverages);

            stats.DominantFrequency = (int)Math.Round(hertzBinWidth * maxColumnId) + (int)spectralTarget.Minimum;

            // remainder of this method is to produce debugging images. Can comment out when not debugging.

            /*
             * var normalisedIndex = DataTools.NormaliseMatrixValues(columnAverages);
             * var image4 = GraphsAndCharts.DrawGraph("columnSums", normalisedIndex, 100);
             * string path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\columnSums.png";
             * image4.Save(path4);
             * normalisedIndex = DataTools.NormaliseMatrixValues(rowAverages);
             * image4 = GraphsAndCharts.DrawGraph("rowSums", normalisedIndex, 100);
             * path4 = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\rowSums.png";
             * image4.Save(path4);
             */
            return(stats);
        }
Exemple #4
0
        /// <summary>
        /// Implements the "Adaptive Level Equalisatsion" algorithm of Lamel et al, 1981 - with modifications for our signals.
        /// Units are assumed to be decibels.
        /// Returns the min and max frame dB AND the estimate MODAL or BACKGROUND noise for the signal array
        /// IF This modal noise is subtracted from each frame dB, the effect is to set set average background noise level = 0 dB.
        /// The algorithm is described in Lamel et al, 1981.
        /// USED TO SEGMENT A RECORDING INTO SILENCE AND VOCALISATION
        /// NOTE: noiseThreshold is passed as decibels. Original Lamel algorithm ONLY SEARCHES in range min to 10dB above min.
        ///
        /// This method debugged on 7 Aug 2018 using following command line arguments:
        /// audio2csv Y:\TheNatureConservency\Myanmar\20180517\site112\2018_02_14_Bar5\20180214_Bar5\20180214_101121_Bar5.wav Towsey.Acoustic.yml C:\Temp... -m True
        /// </summary>
        /// <param name="dBarray">signal in decibel values</param>
        /// <param name="minDb">minimum value in the passed array of decibel values</param>
        /// <param name="maxDb">maximum value in the passed array of decibel values</param>
        /// <param name="modeNoise">modal or background noise in decibels</param>
        /// <param name="sdNoise">estimated sd of the noies - assuming noise to be guassian</param>
        public static void CalculateNoiseUsingLamelsAlgorithm(
            double[] dBarray,
            out double minDb,
            out double maxDb,
            out double modeNoise,
            out double sdNoise)
        {
            // set constants
            double noiseThreshold_DB = 10.0; // dB
            var    binCount          = 100;  // number of bins for histogram is FIXED
            double histogramBinWidth = noiseThreshold_DB / binCount;

            //ignore first N and last N frames when calculating background noise level because
            // sometimes these frames have atypically low signal values
            int buffer = 20; //ignore first N and last N frames when calculating background noise level

            //HOWEVER do not ignore them for short recordings!
            int arrayLength = dBarray.Length;

            if (arrayLength < 1000)
            {
                buffer = 0; //ie recording is < approx 11 seconds long
            }

            double min = double.MaxValue;
            double max = -double.MaxValue;

            for (int i = buffer; i < arrayLength - buffer; i++)
            {
                if (dBarray[i] < min)
                {
                    min = dBarray[i];
                }
                else if (dBarray[i] > max)
                {
                    max = dBarray[i];
                }
            }

            if (min <= SNR.MinimumDbBoundForEnvironmentalNoise)
            {
                min = SNR.MinimumDbBoundForEnvironmentalNoise;
            }

            // return the outs!
            minDb = min;
            maxDb = max;

            var histo        = new int[binCount];
            var absThreshold = minDb + noiseThreshold_DB;

            for (var i = 0; i < arrayLength; i++)
            {
                if (dBarray[i] <= absThreshold)
                {
                    var id = (int)((dBarray[i] - minDb) / histogramBinWidth);
                    if (id >= binCount)
                    {
                        id = binCount - 1;
                    }
                    else if (id < 0)
                    {
                        id = 0;
                    }

                    histo[id]++;
                }
            }

            var smoothHisto = DataTools.filterMovingAverage(histo, 3);

            //DataTools.writeBarGraph(histo);

            // find peak of lowBins histogram
            SNR.GetModeAndOneStandardDeviation(smoothHisto, out var indexOfMode, out var indexOfOneSd);

            // return remaining outs!
            modeNoise = min + ((indexOfMode + 1) * histogramBinWidth);    // modal noise level
            sdNoise   = (indexOfMode - indexOfOneSd) * histogramBinWidth; // SD of the noise
        }
        /// <summary>
        /// This method rearranges the content of a false-colour spectrogram according to the acoustic cluster or acoustic state to which each minute belongs.
        /// The time scale is added in afterwards - must overwrite the previous time scale and title bar.
        /// THis method was writtent to examine the cluster content of recordings analysed by Mangalam using a 10x10 SOM.
        /// The output image was used in the paper presented by Michael Towsey to Ecoacoustics Congress 2016, at Michigan State University.
        /// </summary>
        public static void ExtractSOMClusters2()
        {
            string opDir       = @"C:\SensorNetworks\Output\Mangalam_EcoAcCongress2016\";
            string clusterFile = opDir + "Minute_cluster mapping - all.csv";

            //string inputImagePath = @"C:\SensorNetworks\Output\Mangalam_EcoAcCongress2016\SERF Spectrogram SW 2010Oct14.png";
            string inputImagePath = @"C:\SensorNetworks\Output\Mangalam_EcoAcCongress2016\SERF Spectrogram NW 2010Oct14.png";
            string fileStem       = "NW_14Oct";

            //string fileStem = "SW_14Oct";
            string opFileName = fileStem + ".SOM27AcousticClusters.png";
            string title      = string.Format("SOM CLUSTERS of ACOUSTIC INDICES: recording {0}", fileStem);

            int        clusterCount = 27; // from Yvonne's method
            List <Pen> pens         = ImageTools.GetColorPalette(clusterCount);
            Pen        whitePen     = new Pen(Color.White);
            Pen        blackPen     = new Pen(Color.Black);

            //SizeF stringSize = new SizeF();
            Font stringFont = new Font("Arial", 12, FontStyle.Bold);

            //Font stringFont = new Font("Tahoma", 9);

            // assignment of cluster numbers to cluster LABEL
            string[] clusterLabel = { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a" };

            // read the data file containing cluster sequence
            List <string> lines = FileTools.ReadTextFile(clusterFile);

            string[] words = null;
            for (int i = 0; i < lines.Count; i++)
            {
                if (lines[i].StartsWith(fileStem))
                {
                    words = lines[i].Split(',');
                    break;
                }
            }

            // init histogram to accumulate the cluster counts
            int[] clusterHistogram = new int[clusterCount];

            // init array of lists to know what minutes are assigned to what clusters.
            List <int>[] clusterArrays = new List <int> [clusterCount];
            for (int i = 0; i < clusterCount; i++)
            {
                clusterArrays[i] = new List <int>();
            }

            // construct cluster histogram and arrays
            for (int w = 1; w < words.Length; w++)
            {
                int clusterID = int.Parse(words[w]);
                clusterHistogram[clusterID - 1]++;
                clusterArrays[clusterID - 1].Add(w);
            }

            // ranks cluster counts in descending order
            Tuple <int[], int[]> tuple = DataTools.SortArray(clusterHistogram);

            int[] sortOrder = tuple.Item1;

            //read in the image
            FileInfo fi = new FileInfo(inputImagePath);

            if (!fi.Exists)
            {
                Console.WriteLine("\n\n >>>>>>>> FILE DOES NOT EXIST >>>>>>: " + fi.Name);
            }

            Console.WriteLine("Reading file: " + fi.Name);
            Bitmap ipImage    = ImageTools.ReadImage2Bitmap(fi.FullName);
            int    imageWidth = ipImage.Width;
            int    imageHt    = ipImage.Height;

            //init the output image
            int      opImageWidth = imageWidth + (2 * clusterCount);
            Image    opImage      = new Bitmap(opImageWidth, imageHt);
            Graphics gr           = Graphics.FromImage(opImage);

            gr.Clear(Color.Black);

            // this loop re
            int opColumnNumber     = 0;
            int clusterStartColumn = 0;

            for (int id = 0; id < clusterCount; id++)
            {
                int sortID = sortOrder[id];

                Console.WriteLine("Reading CLUSTER: " + (sortID + 1) + "  Label=" + clusterLabel[sortID]);
                int[] minutesArray = clusterArrays[sortID].ToArray();
                clusterStartColumn = opColumnNumber;

                // read through the entire list of minutes
                for (int m = 0; m < minutesArray.Length; m++)
                {
                    // get image column
                    Rectangle rectangle = new Rectangle(minutesArray[m] - 1, 0, 1, imageHt);
                    Bitmap    column    = ipImage.Clone(rectangle, ipImage.PixelFormat);
                    gr.DrawImage(column, opColumnNumber, 0);
                    opColumnNumber++;
                }

                // draw in separators
                gr.DrawLine(whitePen, opColumnNumber, 0, opColumnNumber, imageHt - 1);
                opColumnNumber++;
                gr.DrawLine(whitePen, opColumnNumber, 0, opColumnNumber, imageHt - 1);
                opColumnNumber++;

                // draw Cluster ID at bottom of the image
                if (minutesArray.Length > 3)
                {
                    Bitmap   clusterIDImage = new Bitmap(minutesArray.Length, SpectrogramConstants.HEIGHT_OF_TITLE_BAR - 6);
                    Graphics g2             = Graphics.FromImage(clusterIDImage);
                    g2.Clear(Color.Black);
                    gr.DrawImage(clusterIDImage, clusterStartColumn, imageHt - 19);
                    int location = opColumnNumber - ((opColumnNumber - clusterStartColumn) / 2);
                    gr.DrawString(clusterLabel[sortID], stringFont, Brushes.White, new PointF(location - 10, imageHt - 19));
                }
            }

            //Draw the title bar
            Image titleBar = DrawTitleBarOfClusterSpectrogram(title, opImageWidth - 2);

            gr.DrawImage(titleBar, 1, 0);
            opImage.Save(Path.Combine(opDir, opFileName));
        }
        public static double[] CalculateScores(double[] subBandSpectrum, int windowWidth)
        {
            double[] scores = { 0, 0, 0 };

            //TEST ONE

            /*
             * double totalAreaUnderSpectrum = subBandSpectrum.Sum();
             * double areaUnderLowest24bins = 0.0;
             * for (int i = 0; i < 24; i++)
             * {
             *  areaUnderLowest24bins += subBandSpectrum[i];
             * }
             * double areaUnderHighBins = totalAreaUnderSpectrum - areaUnderLowest24bins;
             * double areaUnderBins4to7 = 0.0;
             * for (int i = 4; i < 7; i++)
             * {
             *  areaUnderBins4to7 += subBandSpectrum[i];
             * }
             * double ratio1 = areaUnderBins4to7 / areaUnderLowest24bins;
             *
             * double areaUnderBins38to72 = 0.0;
             * for (int i = 38; i < 44; i++)
             * {
             *  areaUnderBins38to72 += subBandSpectrum[i];
             * }
             * for (int i = 52; i < 57; i++)
             * {
             *  areaUnderBins38to72 += subBandSpectrum[i];
             * }
             * for (int i = 64; i < 72; i++)
             * {
             *  areaUnderBins38to72 += subBandSpectrum[i];
             * }
             * double ratio2 = areaUnderBins38to72 / areaUnderHighBins;
             * double score = (ratio1 * 0.2) + (ratio2 * 0.8);
             * double[] truePositives = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0006, 0.0014, 0.0015, 0.0010, 0.0002, 0.0001, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0005, 0.0006, 0.0005, 0.0003, 0.0002, 0.0001, 0.0002, 0.0007, 0.0016, 0.0026, 0.0035, 0.0037, 0.0040, 0.0046, 0.0040, 0.0031, 0.0022, 0.0048, 0.0133, 0.0149, 0.0396, 0.1013, 0.1647, 0.2013, 0.2236, 0.2295, 0.1836, 0.1083, 0.0807, 0.0776, 0.0964, 0.1116, 0.0987, 0.1065, 0.1575, 0.3312, 0.4829, 0.5679, 0.5523, 0.4412, 0.2895, 0.2022, 0.2622, 0.2670, 0.2355, 0.1969, 0.2220, 0.6600, 0.9023, 1.0000, 0.8099, 0.8451, 0.8210, 0.5511, 0.1756, 0.0319, 0.0769, 0.0738, 0.2235, 0.3901, 0.4565, 0.4851, 0.3703, 0.3643, 0.2497, 0.2705, 0.3456, 0.3096, 0.1809, 0.0710, 0.0828, 0.0857, 0.0953, 0.1308, 0.1387, 0.0590 };
             *
             * if (score > 0.4)
             *  eventFound = true;
             * if ((areaUnderHighBins/3) < areaUnderLowest24bins)
             * //if (ratio1 > ratio2)
             * {
             *  eventFound = false;
             * }
             */

            // TEST TWO (A)
            // these are used for scoring
            //double[] truePositives1 = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0006, 0.0014, 0.0015, 0.0010, 0.0002, 0.0001, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0003, 0.0005, 0.0006, 0.0005, 0.0003, 0.0002, 0.0001, 0.0002, 0.0007, 0.0016, 0.0026, 0.0035, 0.0037, 0.0040, 0.0046, 0.0040, 0.0031, 0.0022, 0.0048, 0.0133, 0.0149, 0.0396, 0.1013, 0.1647, 0.2013, 0.2236, 0.2295, 0.1836, 0.1083, 0.0807, 0.0776, 0.0964, 0.1116, 0.0987, 0.1065, 0.1575, 0.3312, 0.4829, 0.5679, 0.5523, 0.4412, 0.2895, 0.2022, 0.2622, 0.2670, 0.2355, 0.1969, 0.2220, 0.6600, 0.9023, 1.0000, 0.8099, 0.8451, 0.8210, 0.5511, 0.1756, 0.0319, 0.0769, 0.0738, 0.2235, 0.3901, 0.4565, 0.4851, 0.3703, 0.3643, 0.2497, 0.2705, 0.3456, 0.3096, 0.1809, 0.0710, 0.0828, 0.0857, 0.0953, 0.1308, 0.1387, 0.0590 };
            //double[] truePositives2 = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0001, 0.0001, 0.0001, 0.0001, 0.0000, 0.0000, 0.0001, 0.0001, 0.0003, 0.0004, 0.0004, 0.0002, 0.0001, 0.0001, 0.0003, 0.0003, 0.0006, 0.0007, 0.0020, 0.0127, 0.0256, 0.0426, 0.0512, 0.0560, 0.0414, 0.0237, 0.0133, 0.0107, 0.0091, 0.0077, 0.0085, 0.0165, 0.0144, 0.0308, 0.0416, 0.0454, 0.0341, 0.0191, 0.0128, 0.0058, 0.0026, 0.0081, 0.0139, 0.0313, 0.0404, 0.0493, 0.0610, 0.1951, 0.4083, 0.5616, 0.5711, 0.5096, 0.4020, 0.2917, 0.1579, 0.1421, 0.1461, 0.1406, 0.2098, 0.1676, 0.2758, 0.2875, 0.6513, 0.9374, 1.0000, 0.7576, 0.4130, 0.2622, 0.1495, 0.0973, 0.0623, 0.0425, 0.0205, 0.0034, 0.0065, 0.0054, 0.0089, 0.0138, 0.0208, 0.0204, 0.0168, 0.0136, 0.0149, 0.0155, 0.0106, 0.0086, 0.0099, 0.0187 };
            //double[] truePositivesA = NormalDist.Convert2ZScores(truePositivesA);
            //double[] truePositivesB = NormalDist.Convert2ZScores(truePositivesB);

            // TEST TWO (B)
            // Use these spectra when using my filtering (i.e. not Chris's prefiltered)
            // these spectra are used for scoring when the window size is 2048
            //double[] truePositives1 = { 0.0014, 0.0012, 0.0009, 0.0003, 0.0001, 0.0005, 0.0008, 0.0029, 0.0057, 0.0070, 0.0069, 0.0063, 0.0053, 0.0032, 0.0013, 0.0011, 0.0011, 0.0007, 0.0000, 0.0006, 0.0010, 0.0013, 0.0008, 0.0009, 0.0022, 0.0046, 0.0069, 0.0082, 0.0070, 0.0065, 0.0082, 0.0078, 0.0052, 0.0021, 0.0132, 0.0357, 0.0420, 0.0996, 0.2724, 0.4557, 0.5739, 0.6366, 0.6155, 0.4598, 0.2334, 0.1468, 0.1410, 0.1759, 0.2157, 0.1988, 0.2131, 0.3072, 0.6161, 0.8864, 1.0000, 0.9290, 0.6983, 0.4208, 0.2690, 0.3190, 0.3109, 0.2605, 0.1896, 0.2118, 0.5961, 0.8298, 0.9290, 0.7363, 0.6605, 0.5840, 0.3576, 0.1019, 0.0162, 0.0400, 0.0405, 0.1106, 0.1803, 0.2083, 0.2058, 0.1475, 0.1387, 0.0870, 0.0804, 0.0975, 0.0848, 0.0490, 0.0193, 0.0217, 0.0210, 0.0214, 0.0253, 0.0254, 0.0072 };
            //double[] truePositives2 = { 0.0090, 0.0106, 0.0138, 0.0134, 0.0088, 0.0026, 0.0002, 0.0002, 0.0003, 0.0000, 0.0001, 0.0006, 0.0013, 0.0019, 0.0020, 0.0015, 0.0008, 0.0004, 0.0002, 0.0015, 0.0022, 0.0073, 0.0195, 0.0628, 0.2203, 0.4031, 0.5635, 0.5445, 0.4828, 0.2869, 0.1498, 0.0588, 0.0500, 0.0542, 0.0641, 0.1188, 0.1833, 0.1841, 0.2684, 0.3062, 0.2831, 0.1643, 0.0606, 0.0336, 0.0136, 0.0056, 0.0187, 0.0301, 0.0700, 0.1103, 0.1559, 0.2449, 0.5303, 0.8544, 1.0000, 0.8361, 0.6702, 0.4839, 0.3463, 0.1525, 0.1049, 0.1201, 0.1242, 0.2056, 0.1653, 0.2685, 0.2947, 0.5729, 0.7024, 0.6916, 0.4765, 0.2488, 0.1283, 0.0543, 0.0326, 0.0236, 0.0187, 0.0108, 0.0021, 0.0028, 0.0019, 0.0024, 0.0041, 0.0063, 0.0066, 0.0055, 0.0036, 0.0025, 0.0018, 0.0014, 0.0013, 0.0008, 0.0010 };
            // these spectra are used for scoring when the window size is 1024
            double[] truePositives1 = { 0.0007, 0.0004, 0.0000, 0.0025, 0.0059, 0.0069, 0.0044, 0.0012, 0.0001, 0.0006, 0.0013, 0.0032, 0.0063, 0.0067, 0.0070, 0.0033, 0.0086, 0.0128, 0.1546, 0.4550, 0.6197, 0.4904, 0.2075, 0.0714, 0.1171, 0.4654, 0.8634, 1.0000, 0.7099, 0.2960, 0.1335, 0.3526, 0.6966, 0.9215, 0.6628, 0.3047, 0.0543, 0.0602, 0.0931, 0.1364, 0.1314, 0.1047, 0.0605, 0.0204, 0.0128, 0.0114 };
            double[] truePositives2 = { 0.0126, 0.0087, 0.0043, 0.0002, 0.0000, 0.0010, 0.0018, 0.0016, 0.0005, 0.0002, 0.0050, 0.1262, 0.4054, 0.5111, 0.3937, 0.1196, 0.0156, 0.0136, 0.0840, 0.1598, 0.1691, 0.0967, 0.0171, 0.0152, 0.0234, 0.3648, 0.8243, 1.0000, 0.6727, 0.2155, 0.0336, 0.0240, 0.2661, 0.6240, 0.7523, 0.5098, 0.1493, 0.0149, 0.0046, 0.0020, 0.0037, 0.0061, 0.0061, 0.0036, 0.0010, 0.0008 };

            var    zscores          = NormalDist.Convert2ZScores(subBandSpectrum);
            double correlationScore = 0.0;
            double score1           = AutoAndCrossCorrelation.CorrelationCoefficient(zscores, truePositives1);
            double score2           = AutoAndCrossCorrelation.CorrelationCoefficient(zscores, truePositives2);

            correlationScore = score1;
            if (score2 > correlationScore)
            {
                correlationScore = score2;
            }

            // TEST THREE: sharpness and height of peaks
            // score the four heighest peaks
            double peaksScore = 0;

            double[] spectrumCopy = new double[subBandSpectrum.Length];
            for (int i = 0; i < subBandSpectrum.Length; i++)
            {
                spectrumCopy[i] = subBandSpectrum[i];
            }

            // set spectrum bounds
            int lowerBound = subBandSpectrum.Length / 4;
            int upperBound = subBandSpectrum.Length * 7 / 8;

            for (int p = 0; p < 4; p++)
            {
                int peakLocation = DataTools.GetMaxIndex(spectrumCopy);
                if (peakLocation < lowerBound)
                {
                    continue; // peak location cannot be too low
                }

                if (peakLocation > upperBound)
                {
                    continue; // peak location cannot be too high
                }

                double peakHeight = spectrumCopy[peakLocation];
                int    nh         = 3;
                if (windowWidth == 2048)
                {
                    nh = 6;
                }

                double peakSides = (subBandSpectrum[peakLocation - nh] + subBandSpectrum[peakLocation + nh]) / 2;
                peaksScore += peakHeight - peakSides;

                //now zero peak and peak neighbourhood
                if (windowWidth == 2048)
                {
                    nh = 9;
                }

                for (int n = 0; n < nh; n++)
                {
                    spectrumCopy[peakLocation + n] = 0;
                    spectrumCopy[peakLocation - n] = 0;
                }
            } // for 4 peaks

            // take average of four peaks
            peaksScore /= 4;

            // TEST FOUR: peak position ratios
            //
            //int[] peakLocationCentres = { 3, 10, 37, 44, 54, 67 };
            int[] peakLocationCentres = { 2, 5, 19, 22, 27, 33 };

            int nh2 = 6;

            if (windowWidth == 1024)
            {
                nh2 = 3;
            }

            int[]    actualPeakLocations = new int[6];
            double[] relativePeakHeights = new double[6];
            for (int p = 0; p < 6; p++)
            {
                double max   = -double.MaxValue;
                int    maxId = peakLocationCentres[p];
                for (int id = peakLocationCentres[p] - 4; id < peakLocationCentres[p] + 4; id++)
                {
                    if (id < 0)
                    {
                        id = 0;
                    }

                    if (subBandSpectrum[id] > max)
                    {
                        max   = subBandSpectrum[id];
                        maxId = id;
                    }
                }

                actualPeakLocations[p] = maxId;
                int lowerPosition = maxId - nh2;
                if (lowerPosition < 0)
                {
                    lowerPosition = 0;
                }

                relativePeakHeights[p] = subBandSpectrum[maxId] - subBandSpectrum[lowerPosition] - subBandSpectrum[maxId + nh2];
            }

            double[] targetHeights     = { 0.1, 0.1, 0.5, 0.5, 1.0, 0.6 };
            var      zscores1          = NormalDist.Convert2ZScores(relativePeakHeights);
            var      zscores2          = NormalDist.Convert2ZScores(targetHeights);
            double   relativePeakScore = AutoAndCrossCorrelation.CorrelationCoefficient(zscores1, zscores2);

            //###########################################################################################
            // PROCESS SCORES
            //if (score1 > scoreThreshold) eventFound = true;
            //if ((score1 > scoreThreshold) || (score2 > scoreThreshold)) eventFound = true;
            //double score = (correlationScore * 0.3) + (peaksScore * 0.7);
            double score = (relativePeakScore * 0.4) + (peaksScore * 0.6);

            scores[0] = score;
            scores[1] = relativePeakScore;
            scores[2] = peaksScore;
            return(scores);
        }
Exemple #7
0
        public void TestFreqScaleOnArtificialSignal2()
        {
            int    sampleRate = 64000;
            double duration   = 30; // signal duration in seconds

            int[] harmonics       = { 500, 1000, 2000, 4000, 8000 };
            var   freqScale       = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000);
            var   outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal2_OctaveFreqScale.png");
            var   recording       = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine);

            // init the default sonogram config
            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.WindowSize,
                WindowOverlap           = 0.2,
                SourceFName             = "Signal2",
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };
            var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);

            sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale);

            // pick a row, any row
            var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40);

            oneSpectrum = DataTools.filterMovingAverage(oneSpectrum, 5);
            var peaks = DataTools.GetPeaks(oneSpectrum);

            var peakIds = new List <int>();

            for (int i = 5; i < peaks.Length - 5; i++)
            {
                if (peaks[i])
                {
                    int peakId = freqScale.BinBounds[i, 0];
                    peakIds.Add(peakId);
                    LoggedConsole.WriteLine($"Spectral peak located in bin {peakId},  Herz={freqScale.BinBounds[i, 1]}");
                }
            }

            foreach (int h in harmonics)
            {
                LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}");
            }

            Assert.AreEqual(5, peakIds.Count);
            Assert.AreEqual(129, peakIds[0]);
            Assert.AreEqual(257, peakIds[1]);
            Assert.AreEqual(513, peakIds[2]);
            Assert.AreEqual(1025, peakIds[3]);
            Assert.AreEqual(2049, peakIds[4]);

            var    image = sonogram.GetImage();
            string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)}   SR={sampleRate}  Window={freqScale.WindowSize}";

            image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations);
            image.Save(outputImagePath);

            // Check that image dimensions are correct
            Assert.AreEqual(146, image.Width);
            Assert.AreEqual(310, image.Height);
        }
Exemple #8
0
        public static Image <Rgb24> GetSonogramImage(double[,] data, int nyquistFreq, int maxFrequency, bool doMelScale, int binHeight, bool doHighlightSubband, int subBandMinHz, int subBandMaxHz)
        {
            int width   = data.GetLength(0); // Number of spectra in sonogram
            int fftBins = data.GetLength(1);
            int maxBin  = (int)Math.Floor(fftBins * maxFrequency / (double)nyquistFreq);

            int imageHeight = maxBin * binHeight; // image ht = sonogram ht. Later include grid and score scales

            //set up min, max, range for normalising of dB values
            DataTools.MinMax(data, out double min, out double max);
            double range = max - min;

            // readjust min and max to create the effect of contrast stretching. It enhances the spectrogram a bit
            double fractionalStretching = 0.01;

            min   = min + (range * fractionalStretching);
            max   = max - (range * fractionalStretching);
            range = max - min;

            //int? minHighlightFreq = this.subBand_MinHz;
            //int? maxHighlightFreq = this.subBand_MaxHz;
            //int minHighlightBin = (minHighlightFreq == null) ? 0 : (int)Math.Round((double)minHighlightFreq / (double)NyquistFrequency * fftBins);
            //int maxHighlightBin = (maxHighlightFreq == null) ? 0 : (int)Math.Round((double)maxHighlightFreq / (double)NyquistFrequency * fftBins);

            //calculate top and bottom of sub-band
            int minHighlightBin = (int)Math.Round(subBandMinHz / (double)nyquistFreq * fftBins);
            int maxHighlightBin = (int)Math.Round(subBandMaxHz / (double)nyquistFreq * fftBins);

            if (doMelScale)
            {
                double maxMel      = MFCCStuff.Mel(nyquistFreq);
                int    melRange    = (int)(maxMel - 0 + 1);
                double pixelPerMel = imageHeight / (double)melRange;
                double minBandMel  = MFCCStuff.Mel(subBandMinHz);
                double maxBandMel  = MFCCStuff.Mel(subBandMaxHz);
                minHighlightBin = (int)Math.Round(minBandMel * pixelPerMel);
                maxHighlightBin = (int)Math.Round(maxBandMel * pixelPerMel);
            }

            Color[] grayScale = ImageTools.GrayScale();

            var bmp     = new Image <Rgb24>(width, imageHeight);
            int yOffset = imageHeight;

            // for all freq bins
            for (int y = 0; y < maxBin; y++)
            {
                //repeat this bin if ceptral image
                for (int r = 0; r < binHeight; r++)
                {
                    // for all pixels in line
                    for (int x = 0; x < width; x++)
                    {
                        // NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range
                        double value = (data[x, y] - min) / range;
                        int    c     = 255 - (int)Math.Floor(255.0 * value); //original version
                        if (c < 0)
                        {
                            c = 0;
                        }
                        else if (c >= 256)
                        {
                            c = 255;
                        }

                        int g = c + 40; // green tinge used in the template scan band
                        if (g >= 256)
                        {
                            g = 255;
                        }

                        var col = doHighlightSubband && IsInBand(y, minHighlightBin, maxHighlightBin) ? Color.FromRgb((byte)c, (byte)g, (byte)c) : grayScale[c];
                        bmp[x, yOffset - 1] = col;
                    }

                    yOffset--;
                } //end repeats over one track
            }

            return(bmp);
        }
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            var recognizerConfig = new LitoriaCaeruleaConfig();

            recognizerConfig.ReadConfigFile(configuration);

            // common properties
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no name>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // BETTER TO SET THESE. IGNORE USER!
            // This framesize is large because the oscillation we wish to detect is due to repeated croaks
            // having an interval of about 0.6 seconds. The overlap is also required to give smooth oscillation.
            const int    frameSize     = 2048;
            const double windowOverlap = 0.5;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // use the default HAMMING window
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),

                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.None
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.0,
            };

            TimeSpan recordingDuration = recording.WavReader.Time;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = sr / (sonoConfig.WindowSize * (1 - windowOverlap));

            //int dominantFreqBin = (int)Math.Round(recognizerConfig.DominantFreq / freqBinWidth) + 1;
            int minBin           = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1;
            int maxBin           = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1;
            var decibelThreshold = 9.0;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            int rowCount = sonogram.Data.GetLength(0);

            // get the freq band as set by min and max Herz
            var frogBand = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            // Now look for spectral maxima. For L.caerulea, the max should lie around 1100Hz +/-150 Hz.
            // Skip over spectra where maximum is not in correct location.
            int buffer            = 150;
            var croakScoreArray   = new double[rowCount];
            var hzAtTopOfTopBand  = recognizerConfig.DominantFreq + buffer;
            var hzAtBotOfTopBand  = recognizerConfig.DominantFreq - buffer;
            var binAtTopOfTopBand = (int)Math.Round((hzAtTopOfTopBand - recognizerConfig.MinHz) / freqBinWidth);
            var binAtBotOfTopBand = (int)Math.Round((hzAtBotOfTopBand - recognizerConfig.MinHz) / freqBinWidth);

            // scan the frog band and get the decibel value of those spectra which have their maximum within the correct subband.
            for (int x = 0; x < rowCount; x++)
            {
                //extract spectrum
                var spectrum = MatrixTools.GetRow(frogBand, x);
                int maxIndex = DataTools.GetMaxIndex(spectrum);
                if (spectrum[maxIndex] < decibelThreshold)
                {
                    continue;
                }

                if (maxIndex < binAtTopOfTopBand && maxIndex > binAtBotOfTopBand)
                {
                    croakScoreArray[x] = spectrum[maxIndex];
                }
            }

            // Perpare a normalised plot for later display with spectrogram
            double[] normalisedScores;
            double   normalisedThreshold;

            DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
            var text1      = string.Format($"Croak scores (threshold={decibelThreshold})");
            var croakPlot1 = new Plot(text1, normalisedScores, normalisedThreshold);

            // extract potential croak events from the array of croak candidate
            var croakEvents = AcousticEvent.ConvertScoreArray2Events(
                croakScoreArray,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                recognizerConfig.EventThreshold,
                recognizerConfig.MinCroakDuration,
                recognizerConfig.MaxCroakDuration,
                segmentStartOffset);

            // add necesary info into the candidate events
            var prunedEvents = new List <AcousticEvent>();

            foreach (var ae in croakEvents)
            {
                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // With those events that survive the above Array2Events process, we now extract a new array croak scores
            croakScoreArray = AcousticEvent.ExtractScoreArrayFromEvents(prunedEvents, rowCount, recognizerConfig.AbbreviatedSpeciesName);
            DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
            var text2      = string.Format($"Croak events (threshold={decibelThreshold})");
            var croakPlot2 = new Plot(text2, normalisedScores, normalisedThreshold);

            // Look for oscillations in the difference array
            // duration of DCT in seconds
            //croakScoreArray = DataTools.filterMovingAverageOdd(croakScoreArray, 5);
            double dctDuration = recognizerConfig.DctDuration;

            // minimum acceptable value of a DCT coefficient
            double dctThreshold = recognizerConfig.DctThreshold;
            double minOscRate   = 1 / recognizerConfig.MaxPeriod;
            double maxOscRate   = 1 / recognizerConfig.MinPeriod;
            var    dctScores    = Oscillations2012.DetectOscillations(croakScoreArray, framesPerSecond, dctDuration, minOscRate, maxOscRate, dctThreshold);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            var events = AcousticEvent.ConvertScoreArray2Events(
                dctScores,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                recognizerConfig.EventThreshold,
                recognizerConfig.MinDuration,
                recognizerConfig.MaxDuration,
                segmentStartOffset);

            double[,] hits = null;
            prunedEvents   = new List <AcousticEvent>();
            foreach (var ae in events)
            {
                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // do a recognizer test.
            if (MainEntry.InDEBUG)
            {
                //TestTools.RecognizerScoresTest(scores, new FileInfo(recording.FilePath));
                //AcousticEvent.TestToCompareEvents(prunedEvents, new FileInfo(recording.FilePath));
            }

            var scoresPlot = new Plot(this.DisplayName, dctScores, recognizerConfig.EventThreshold);

            if (true)
            {
                // display a variety of debug score arrays
                // calculate amplitude at location
                double[] amplitudeArray = MatrixTools.SumRows(frogBand);
                DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scoresPlot, croakPlot2, croakPlot1, amplPlot
                };

                // NOTE: This DrawDebugImage() method can be over-written in this class.
                var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits);
                var debugPath  = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");
                debugImage.Save(debugPath);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = scoresPlot.AsList(),
                Events = prunedEvents,

                //Events = events
            });
        }
        //////public static IndexCalculateResult Analysis(
        public static SpectralIndexValuesForContentDescription Analysis(
            AudioRecording recording,
            TimeSpan segmentOffsetTimeSpan,
            int sampleRateOfOriginalAudioFile,
            bool returnSonogramInfo = false)
        {
            // returnSonogramInfo = true; // if debugging
            double epsilon    = recording.Epsilon;
            int    sampleRate = recording.WavReader.SampleRate;

            //var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            var indexCalculationDuration = TimeSpan.FromSeconds(ContentSignatures.IndexCalculationDurationInSeconds);

            // Get FRAME parameters for the calculation of Acoustic Indices
            int frameSize = ContentSignatures.FrameSize;
            int frameStep = frameSize;                                 // that is, windowOverlap = zero

            double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second
            var    frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond));

            // INITIALISE a RESULTS STRUCTURE TO return
            // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc.
            var config          = new IndexCalculateConfig(); // sets some default values
            int freqBinCount    = frameSize / 2;
            var indexProperties = GetIndexProperties();
            ////////var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, segmentOffsetTimeSpan, config);
            var spectralIndices = new SpectralIndexValuesForContentDescription();

            ///////result.SummaryIndexValues = null;
            ///////SpectralIndexValues spectralIndices = result.SpectralIndexValues;

            // set up default spectrogram to return
            ///////result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null;
            ///////result.Hits = null;
            ///////result.TrackScores = new List<Plot>();

            // ################################## FINISHED SET-UP
            // ################################## NOW GET THE AMPLITUDE SPECTROGRAM

            // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT
            // Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns.
            var dspOutput1           = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep);
            var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram;

            // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ##################################
            // Get the oscillation spectral index OSC separately from signal because need a different frame size etc.

            var sampleLength       = Oscillations2014.DefaultSampleLength;
            var frameLength        = Oscillations2014.DefaultFrameLength;
            var sensitivity        = Oscillations2014.DefaultSensitivityThreshold;
            var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(recording, frameLength, sampleLength, sensitivity);

            // double length of the vector because want to work with 256 element vector for spectrogram purposes
            spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort);

            // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ##################################

            // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2.
            // original sample rate can be anything 11.0-44.1 kHz.
            int originalNyquist = sampleRateOfOriginalAudioFile / 2;

            // if up-sampling has been done
            if (dspOutput1.NyquistFreq > originalNyquist)
            {
                dspOutput1.NyquistFreq = originalNyquist;
                dspOutput1.NyquistBin  = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that bin width does not change
            }

            // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX
            spectralIndices.ACI = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram);

            // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration
            double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram);
            for (int i = 0; i < temporalEntropySpectrum.Length; i++)
            {
                temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i];
            }

            spectralIndices.ENT = temporalEntropySpectrum;

            // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ##################################

            // i: Convert amplitude spectrogram to decibels and calculate the dB background noise profile
            double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);
            spectralIndices.BGN = spectralDecibelBgn;

            // ii: Calculate the noise reduced decibel spectrogram derived from segment recording.
            //     REUSE the var decibelSpectrogram but this time using dspOutput1.
            decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0);

            // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM
            spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(decibelSpectrogram);

            // ######################################################################################################################################################
            // iv: CALCULATE SPECTRAL COVER. NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0
            //           FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth
            // dB THRESHOLD for calculating spectral coverage
            double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb;

            // Calculate lower and upper boundary bin ids.
            // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from bio-phony.
            int midFreqBound   = config.MidFreqBound;
            int lowFreqBound   = config.LowFreqBound;
            int lowerBinBound  = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth);
            int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth);
            var spActivity     = ActivityAndCover.CalculateSpectralEvents(decibelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound);

            //spectralIndices.CVR = spActivity.CoverSpectrum;
            spectralIndices.EVN = spActivity.EventSpectrum;

            ///////result.TrackScores = null;
            ///////return result;
            return(spectralIndices);
        } // end calculation of Six Spectral Indices
Exemple #11
0
        public Image <Rgb24> GetImage_ReducedSonogram(int factor, bool drawGridLines)
        {
            //  double[] logEnergy = this.LogEnPerFrame;
            var data        = this.Data;         //sonogram intensity values
            int frameCount  = data.GetLength(0); // Number of spectra in sonogram
            int imageHeight = data.GetLength(1); // image ht = sonogram ht. Later include grid and score scales
            int imageWidth  = frameCount / factor;
            int subSample   = frameCount / imageWidth;

            //set up min, max, range for normalising of dB values
            DataTools.MinMax(data, out double min, out double max);
            double range = max - min;

            var grayScale = ImageTools.GrayScale();

            //set up the 1000kHz scale
            int herzInterval = 1000;

            int[] vScale = FrequencyScale.CreateLinearYaxis(herzInterval, this.NyquistFrequency, imageHeight); //calculate location of 1000Hz grid lines
            var   bmp    = new Image <Rgb24>(imageWidth, imageHeight);

            for (int w = 0; w < imageWidth; w++)
            {
                int    start = w * subSample;
                int    end   = ((w + 1) * subSample) - 1;
                double maxE  = -double.MaxValue;
                int    maxId = 0;
                for (int x = start; x < end; x++)
                {
                    // NOTE!@#$%^ This was changed from LogEnergy on 30th March 2009.
                    if (maxE < this.DecibelsPerFrame[x])
                    {
                        maxE  = this.DecibelsPerFrame[x];
                        maxId = x;
                    }
                }

                // have found the frame with max energy. Now draw its spectrum
                // over all freq bins
                for (int y = 0; y < data.GetLength(1); y++)
                {
                    // NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range
                    double value = (data[maxId, y] - min) / range;
                    int    c     = 255 - (int)Math.Floor(255.0 * value); //original version
                    if (c < 0)
                    {
                        c = 0;
                    }
                    else if (c >= 256)
                    {
                        c = 255;
                    }

                    var col = grayScale[c];
                    bmp[w, imageHeight - y - 1] = col;
                } //end over all freq bins

                //set up grid color

                if (drawGridLines)
                {
                    var gridCol = Color.Black;
                    if (w % 2 == 0)
                    {
                        gridCol = Color.Black;
                    }

                    //over all Y-axis pixels
                    for (int p = 0; p < vScale.Length; p++)
                    {
                        if (vScale[p] == 0)
                        {
                            continue;
                        }

                        int y = imageHeight - p;
                        bmp[w, y] = gridCol;
                    }
                }
            }

            return(bmp);
        }
        /// <summary>
        /// Apply feature learning process on a set of target (1-minute) recordings (inputPath)
        /// according to the a set of centroids learned using feature learning process.
        /// Output feature vectors (outputPath).
        /// </summary>
        public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config, List <double[][]> allCentroids,
                                                         string inputPath, string outputPath)
        {
            var           simVecDir     = Directory.CreateDirectory(Path.Combine(outputPath, "SimilarityVectors"));
            int           frameSize     = config.FrameSize;
            int           finalBinCount = config.FinalBinCount;
            FreqScaleType scaleType     = config.FrequencyScaleType;
            var           settings      = new SpectrogramSettings()
            {
                WindowSize = frameSize,

                // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
                // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
                // The "WindowOverlap" is calculated to answer this question
                // each 24 single-frames duration is equal to 1 second
                // note that the "WindowOverlap" value should be recalculated if frame size is changed
                // this has not yet been considered in the Config file!
                WindowOverlap           = 0.10725204,
                DoMelScale              = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount             = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };
            double frameStep   = frameSize * (1 - settings.WindowOverlap);
            int    minFreqBin  = config.MinFreqBin;
            int    maxFreqBin  = config.MaxFreqBin;
            int    numFreqBand = config.NumFreqBand;
            int    patchWidth  =
                (maxFreqBin - minFreqBin + 1) / numFreqBand;
            int patchHeight = config.PatchHeight;

            // the number of frames that their feature vectors will be concatenated in order to preserve temporal information.
            int frameWindowLength = config.FrameWindowLength;

            // the step size to make a window of frames
            int stepSize = config.StepSize;

            // the factor of downsampling
            int maxPoolingFactor = config.MaxPoolingFactor;

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            //*****
            // lists of features for all processing files
            // the key is the file name, and the value is the features for different bands
            Dictionary <string, List <double[, ]> > allFilesMinFeatureVectors      = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesMeanFeatureVectors     = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesMaxFeatureVectors      = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesStdFeatureVectors      = new Dictionary <string, List <double[, ]> >();
            Dictionary <string, List <double[, ]> > allFilesSkewnessFeatureVectors = new Dictionary <string, List <double[, ]> >();

            double[,] inputMatrix;
            List <AudioRecording> recordings = new List <AudioRecording>();

            foreach (string filePath in Directory.GetFiles(inputPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    settings.SourceFileName = recording.BaseName;

                    if (config.DoSegmentation)
                    {
                        recordings = PatchSampling.GetSubsegmentsSamples(recording, config.SubsegmentDurationInSeconds, frameStep);
                    }
                    else
                    {
                        recordings.Add(recording);
                    }

                    for (int s = 0; s < recordings.Count; s++)
                    {
                        string pathToSimilarityVectorsFile = Path.Combine(simVecDir.FullName, fileInfo.Name + "-" + s.ToString() + ".csv");
                        var    amplitudeSpectrogram        = new AmplitudeSpectrogram(settings, recordings[s].WavReader);
                        var    decibelSpectrogram          = new DecibelSpectrogram(amplitudeSpectrogram);

                        // DO RMS NORMALIZATION
                        //sonogram.Data = SNR.RmsNormalization(sonogram.Data);

                        // DO NOISE REDUCTION
                        if (config.DoNoiseReduction)
                        {
                            decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);
                        }

                        // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
                        if (minFreqBin != 1 || maxFreqBin != finalBinCount)
                        {
                            inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(decibelSpectrogram.Data, minFreqBin, maxFreqBin);
                        }
                        else
                        {
                            inputMatrix = decibelSpectrogram.Data;
                        }

                        // creating matrices from different freq bands of the source spectrogram
                        List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);
                        double[][,] matrices2 = allSubmatrices2.ToArray();
                        List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>();
                        for (int i = 0; i < matrices2.GetLength(0); i++)
                        {
                            // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
                            double[,] downsampledMatrix = FeatureLearning.MaxPooling(matrices2[i], config.MaxPoolingFactor);

                            int rows              = downsampledMatrix.GetLength(0);
                            int columns           = downsampledMatrix.GetLength(1);
                            var sequentialPatches = PatchSampling.GetPatches(downsampledMatrix, patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
                            allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix());
                        }

                        // +++++++++++++++++++++++++++++++++++Feature Transformation
                        // to do the feature transformation, we normalize centroids and
                        // sequential patches from the input spectrogram to unit length
                        // Then, we calculate the dot product of each patch with the centroids' matrix

                        List <double[][]> allNormCentroids = new List <double[][]>();
                        for (int i = 0; i < allCentroids.Count; i++)
                        {
                            // double check the index of the list
                            double[][] normCentroids = new double[allCentroids.ToArray()[i].GetLength(0)][];
                            for (int j = 0; j < allCentroids.ToArray()[i].GetLength(0); j++)
                            {
                                normCentroids[j] = ART_2A.NormaliseVector(allCentroids.ToArray()[i][j]);
                            }

                            allNormCentroids.Add(normCentroids);
                        }

                        List <double[][]> allFeatureTransVectors = new List <double[][]>();

                        // processing the sequential patch matrix for each band
                        for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
                        {
                            List <double[]> featureTransVectors = new List <double[]>();
                            double[][]      similarityVectors   = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][];

                            for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++)
                            {
                                // normalize each patch to unit length
                                var inputVector = allSequentialPatchMatrix.ToArray()[i].ToJagged()[j];
                                var normVector  = inputVector;

                                // to avoid vectors with NaN values, only normalize those that their norm is not equal to zero.
                                if (inputVector.Euclidean() != 0)
                                {
                                    normVector = ART_2A.NormaliseVector(inputVector);
                                }

                                similarityVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
                            }

                            Csv.WriteMatrixToCsv(pathToSimilarityVectorsFile.ToFileInfo(), similarityVectors.ToMatrix());

                            // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames
                            // using FrameWindowLength

                            // patchId refers to the patch id that has been processed so far according to the step size.
                            // if we want no overlap between different frame windows, then stepSize = frameWindowLength
                            int patchId = 0;
                            while (patchId + frameWindowLength - 1 < similarityVectors.GetLength(0))
                            {
                                List <double[]> patchGroup = new List <double[]>();
                                for (int k = 0; k < frameWindowLength; k++)
                                {
                                    patchGroup.Add(similarityVectors[k + patchId]);
                                }

                                featureTransVectors.Add(DataTools.ConcatenateVectors(patchGroup));
                                patchId = patchId + stepSize;
                            }

                            allFeatureTransVectors.Add(featureTransVectors.ToArray());
                        }

                        // +++++++++++++++++++++++++++++++++++Feature Transformation

                        // +++++++++++++++++++++++++++++++++++Temporal Summarization
                        // Based on the resolution to generate features, the "numFrames" parameter will be set.
                        // Each 24 single-frame patches form 1 second
                        // for each 24 patch, we generate 5 vectors of min, mean, std, and max (plus skewness from Accord.net)
                        // The pre-assumption is that each input recording is 1 minute long

                        // store features of different bands in lists
                        List <double[, ]> allMinFeatureVectors      = new List <double[, ]>();
                        List <double[, ]> allMeanFeatureVectors     = new List <double[, ]>();
                        List <double[, ]> allMaxFeatureVectors      = new List <double[, ]>();
                        List <double[, ]> allStdFeatureVectors      = new List <double[, ]>();
                        List <double[, ]> allSkewnessFeatureVectors = new List <double[, ]>();

                        // Each 24 frames form 1 second using WindowOverlap
                        // factors such as stepSize, and maxPoolingFactor should be considered in temporal summarization.
                        int numFrames = 24 / (patchHeight * stepSize * maxPoolingFactor);

                        foreach (var freqBandFeature in allFeatureTransVectors)
                        {
                            List <double[]> minFeatureVectors      = new List <double[]>();
                            List <double[]> meanFeatureVectors     = new List <double[]>();
                            List <double[]> maxFeatureVectors      = new List <double[]>();
                            List <double[]> stdFeatureVectors      = new List <double[]>();
                            List <double[]> skewnessFeatureVectors = new List <double[]>();

                            int c = 0;
                            while (c + numFrames <= freqBandFeature.GetLength(0))
                            {
                                // First, make a list of patches that would be equal to the needed resolution (1 second, 60 second, etc.)
                                List <double[]> sequencesOfFramesList = new List <double[]>();
                                for (int i = c; i < c + numFrames; i++)
                                {
                                    sequencesOfFramesList.Add(freqBandFeature[i]);
                                }

                                List <double> min      = new List <double>();
                                List <double> mean     = new List <double>();
                                List <double> std      = new List <double>();
                                List <double> max      = new List <double>();
                                List <double> skewness = new List <double>();

                                double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();

                                // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise
                                for (int j = 0; j < sequencesOfFrames.GetLength(1); j++)
                                {
                                    double[] temp = new double[sequencesOfFrames.GetLength(0)];
                                    for (int k = 0; k < sequencesOfFrames.GetLength(0); k++)
                                    {
                                        temp[k] = sequencesOfFrames[k, j];
                                    }

                                    min.Add(temp.GetMinValue());
                                    mean.Add(AutoAndCrossCorrelation.GetAverage(temp));
                                    std.Add(AutoAndCrossCorrelation.GetStdev(temp));
                                    max.Add(temp.GetMaxValue());
                                    skewness.Add(temp.Skewness());
                                }

                                minFeatureVectors.Add(min.ToArray());
                                meanFeatureVectors.Add(mean.ToArray());
                                maxFeatureVectors.Add(max.ToArray());
                                stdFeatureVectors.Add(std.ToArray());
                                skewnessFeatureVectors.Add(skewness.ToArray());
                                c += numFrames;
                            }

                            // when (freqBandFeature.GetLength(0) % numFrames) != 0, it means there are a number of frames (< numFrames)
                            // (or the whole) at the end of the target recording , left unprocessed.
                            // this would be problematic when an the resolution to generate the feature vector is 1 min,
                            // but the the length of the target recording is a bit less than one min.
                            if (freqBandFeature.GetLength(0) % numFrames != 0 && freqBandFeature.GetLength(0) % numFrames > 1)
                            {
                                // First, make a list of patches that would be less than the required resolution
                                List <double[]> sequencesOfFramesList = new List <double[]>();
                                int             unprocessedFrames     = freqBandFeature.GetLength(0) % numFrames;
                                for (int i = freqBandFeature.GetLength(0) - unprocessedFrames;
                                     i < freqBandFeature.GetLength(0);
                                     i++)
                                {
                                    sequencesOfFramesList.Add(freqBandFeature[i]);
                                }

                                List <double> min      = new List <double>();
                                List <double> mean     = new List <double>();
                                List <double> std      = new List <double>();
                                List <double> max      = new List <double>();
                                List <double> skewness = new List <double>();

                                double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();

                                // Second, calculate mean, max, and standard deviation (plus skewness) of vectors element-wise
                                for (int j = 0; j < sequencesOfFrames.GetLength(1); j++)
                                {
                                    double[] temp = new double[sequencesOfFrames.GetLength(0)];
                                    for (int k = 0; k < sequencesOfFrames.GetLength(0); k++)
                                    {
                                        temp[k] = sequencesOfFrames[k, j];
                                    }

                                    min.Add(temp.GetMinValue());
                                    mean.Add(AutoAndCrossCorrelation.GetAverage(temp));
                                    std.Add(AutoAndCrossCorrelation.GetStdev(temp));
                                    max.Add(temp.GetMaxValue());
                                    skewness.Add(temp.Skewness());
                                }

                                minFeatureVectors.Add(min.ToArray());
                                meanFeatureVectors.Add(mean.ToArray());
                                maxFeatureVectors.Add(max.ToArray());
                                stdFeatureVectors.Add(std.ToArray());
                                skewnessFeatureVectors.Add(skewness.ToArray());
                            }

                            allMinFeatureVectors.Add(minFeatureVectors.ToArray().ToMatrix());
                            allMeanFeatureVectors.Add(meanFeatureVectors.ToArray().ToMatrix());
                            allMaxFeatureVectors.Add(maxFeatureVectors.ToArray().ToMatrix());
                            allStdFeatureVectors.Add(stdFeatureVectors.ToArray().ToMatrix());
                            allSkewnessFeatureVectors.Add(skewnessFeatureVectors.ToArray().ToMatrix());
                        }

                        //*****
                        // the keys of the following dictionaries contain file name
                        // and their values are a list<double[,]> which the list.count is
                        // the number of all subsegments for which features are extracted
                        // the number of freq bands defined as an user-defined parameter.
                        // the 2D-array is the feature vectors.
                        allFilesMinFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMinFeatureVectors);
                        allFilesMeanFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMeanFeatureVectors);
                        allFilesMaxFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allMaxFeatureVectors);
                        allFilesStdFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allStdFeatureVectors);
                        allFilesSkewnessFeatureVectors.Add(fileInfo.Name + "-" + s.ToString(), allSkewnessFeatureVectors);

                        // +++++++++++++++++++++++++++++++++++Temporal Summarization
                    }
                }
            }

            // ++++++++++++++++++++++++++++++++++Writing features to one file
            // First, concatenate mean, max, std for each second.
            // Then, write the features of each pre-defined frequency band into a separate CSV file.
            var filesName        = allFilesMeanFeatureVectors.Keys.ToArray();
            var minFeatures      = allFilesMinFeatureVectors.Values.ToArray();
            var meanFeatures     = allFilesMeanFeatureVectors.Values.ToArray();
            var maxFeatures      = allFilesMaxFeatureVectors.Values.ToArray();
            var stdFeatures      = allFilesStdFeatureVectors.Values.ToArray();
            var skewnessFeatures = allFilesSkewnessFeatureVectors.Values.ToArray();

            // The number of elements in the list shows the number of freq bands
            // the size of each element in the list shows the number of files processed to generate feature for.
            // the dimensions of the matrix shows the number of feature vectors generated for each file and the length of feature vector
            var allMins     = new List <double[][, ]>();
            var allMeans    = new List <double[][, ]>();
            var allMaxs     = new List <double[][, ]>();
            var allStds     = new List <double[][, ]>();
            var allSkewness = new List <double[][, ]>();

            // looping over freq bands
            for (int i = 0; i < meanFeatures[0].Count; i++)
            {
                var mins       = new List <double[, ]>();
                var means      = new List <double[, ]>();
                var maxs       = new List <double[, ]>();
                var stds       = new List <double[, ]>();
                var skewnesses = new List <double[, ]>();

                // looping over all files
                for (int k = 0; k < meanFeatures.Length; k++)
                {
                    mins.Add(minFeatures[k].ToArray()[i]);
                    means.Add(meanFeatures[k].ToArray()[i]);
                    maxs.Add(maxFeatures[k].ToArray()[i]);
                    stds.Add(stdFeatures[k].ToArray()[i]);
                    skewnesses.Add(skewnessFeatures[k].ToArray()[i]);
                }

                allMins.Add(mins.ToArray());
                allMeans.Add(means.ToArray());
                allMaxs.Add(maxs.ToArray());
                allStds.Add(stds.ToArray());
                allSkewness.Add(skewnesses.ToArray());
            }

            // each element of meanFeatures array is a list of features for different frequency bands.
            // looping over the number of freq bands
            for (int i = 0; i < allMeans.ToArray().GetLength(0); i++)
            {
                // creating output feature file based on the number of freq bands
                var outputFeatureFile = Path.Combine(outputPath, "FeatureVectors-" + i.ToString() + ".csv");

                // creating the header for CSV file
                List <string> header = new List <string>();
                header.Add("file name");

                for (int j = 0; j < allMins.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("min" + j.ToString());
                }

                for (int j = 0; j < allMeans.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("mean" + j.ToString());
                }

                for (int j = 0; j < allMaxs.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("max" + j.ToString());
                }

                for (int j = 0; j < allStds.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("std" + j.ToString());
                }

                for (int j = 0; j < allSkewness.ToArray()[i][0].GetLength(1); j++)
                {
                    header.Add("skewness" + j.ToString());
                }

                var    csv     = new StringBuilder();
                string content = string.Empty;
                foreach (var entry in header.ToArray())
                {
                    content += entry.ToString() + ",";
                }

                csv.AppendLine(content);

                var allFilesFeatureVectors = new Dictionary <string, double[, ]>();

                // looping over files
                for (int j = 0; j < allMeans.ToArray()[i].GetLength(0); j++)
                {
                    // concatenating mean, std, and max vector together for the pre-defined resolution
                    List <double[]> featureVectors = new List <double[]>();
                    for (int k = 0; k < allMeans.ToArray()[i][j].ToJagged().GetLength(0); k++)
                    {
                        List <double[]> featureList = new List <double[]>
                        {
                            allMins.ToArray()[i][j].ToJagged()[k],
                                        allMeans.ToArray()[i][j].ToJagged()[k],
                                        allMaxs.ToArray()[i][j].ToJagged()[k],
                                        allStds.ToArray()[i][j].ToJagged()[k],
                                        allSkewness.ToArray()[i][j].ToJagged()[k],
                        };
                        double[] featureVector = DataTools.ConcatenateVectors(featureList);
                        featureVectors.Add(featureVector);
                    }

                    allFilesFeatureVectors.Add(filesName[j], featureVectors.ToArray().ToMatrix());
                }

                // writing feature vectors to CSV file
                foreach (var entry in allFilesFeatureVectors)
                {
                    content  = string.Empty;
                    content += entry.Key.ToString() + ",";
                    foreach (var cent in entry.Value)
                    {
                        content += cent.ToString() + ",";
                    }

                    csv.AppendLine(content);
                }

                File.WriteAllText(outputFeatureFile, csv.ToString());
            }
        }
Exemple #13
0
 public IActionResult Friends(int page = 1, int row = 10)
 {
     ViewBag.Title = DataTools.MakeWebTitle("友情链接");
     HttpContext.Response.Headers.Add("title", DataTools.MakeWebTitle("友情链接", true));
     return(EnhancedView("Friends"));
 }
Exemple #14
0
        } //Execute()

        public static Output GetInstanceRepresentations(Arguments arguments)
        {
            LoggedConsole.WriteLine("1. Read in all Instances and do feature extraction");

            //################################### FEATURE WEIGHTS
            //TRY DIFFERENT WEIGHTINGS assuming following "SPT,RHZ,RVT,RPS,RNG";
            bool doDeltaFeatures = false;

            double[] weights      = { 1.0, 1.0, 0.8, 0.7, 0.7 };
            double[] deltaWeights = { 1.0, 1.0, 0.8, 0.7, 0.7, 0.5, 0.4, 0.4, 0.2, 0.2 };
            if (doDeltaFeatures)
            {
                weights = deltaWeights;
            }

            //MAX-POOLING for SPECTRAL REDUCTION
            // frequency bins used to reduce dimensionality of the 256 spectral values.
            int startBin  = 8;
            int maxOf2Bin = 117;
            int maxOf3Bin = 160;
            int endBin    = 200;

            double[] testArray = new double[256];
            for (int i = 0; i < testArray.Length; i++)
            {
                testArray[i] = i;
            }

            double[] reducedArray          = MaxPoolingLimited(testArray, startBin, maxOf2Bin, maxOf3Bin, endBin);
            int      reducedSpectralLength = reducedArray.Length;

            LoggedConsole.WriteLine("     Reduced spectral length = " + reducedSpectralLength);
            int instanceCount = arguments.InstanceCount;
            int speciesCount  = arguments.SpeciesCount;

            // READ IN THE SPECIES LABELS FILE AND SET UP THE DATA
            string[] fileID    = new string[instanceCount];
            int[]    speciesID = new int[speciesCount];
            ReadGlotinsSpeciesLabelFile(arguments.SpeciesLabelsFile, instanceCount, out fileID, out speciesID);

            // INIT array of species counts
            int[] instanceNumbersPerSpecies = new int[speciesCount];

            // INIT array of frame counts
            int[] frameNumbersPerInstance = new int[instanceCount];

            // initialise species description matrix
            var keyArray = FEATURE_KEYS.Split(',');

            int totalFeatureCount = keyArray.Length * reducedArray.Length;

            Console.WriteLine("    Total Feature Count = " + totalFeatureCount);

            if (doDeltaFeatures)
            {
                totalFeatureCount *= 2;
                LoggedConsole.WriteLine("    Total Delta Feature Count = " + totalFeatureCount);
            }

            // one matrix row per species
            double[,] instanceFeatureMatrix = new double[instanceCount, totalFeatureCount];

            // loop through all all instances
            for (int j = 0; j < instanceCount; j++)
            {
                LoggedConsole.Write(".");
                int frameCount = 0;

                // get the spectral index files
                int speciesLabel = speciesID[j];

                // dictionary to store feature spectra for instance.
                var aggreDictionary = new Dictionary <string, double[]>();

                // dictionary to store delta spectra for instance.
                var deltaDictionary = new Dictionary <string, double[]>();

                foreach (string key in keyArray)
                {
                    string   name = string.Format("{0}_Species{1:d2}.{2}.csv", fileID[j], speciesLabel, key);
                    FileInfo file = new FileInfo(Path.Combine(arguments.InputDataDirectory.FullName, name));

                    if (file.Exists)
                    {
                        int binCount;
                        double[,] matrix = IndexMatrices.ReadSpectrogram(file, out binCount);

                        // create or get the array of spectral values.
                        double[] aggregateArray = new double[reducedSpectralLength];
                        double[] deltaArray     = new double[reducedSpectralLength];

                        double[] ipVector = MatrixTools.GetRow(matrix, 0);
                        ipVector     = DataTools.SubtractValueAndTruncateToZero(ipVector, arguments.BgnThreshold);
                        reducedArray = MaxPoolingLimited(ipVector, startBin, maxOf2Bin, maxOf3Bin, endBin);
                        double[] previousArray = reducedArray;

                        // transfer spectral values to array.
                        int rowCount = matrix.GetLength(0);

                        //rowCount = (int)Math.Round(rowCount * 0.99); // ###################### USE ONLY 99% of instance
                        //if (rowCount > 1200) rowCount = 1200;
                        for (int r = 1; r < rowCount; r++)
                        {
                            ipVector     = MatrixTools.GetRow(matrix, r);
                            ipVector     = DataTools.SubtractValueAndTruncateToZero(ipVector, arguments.BgnThreshold);
                            reducedArray = MaxPoolingLimited(ipVector, startBin, maxOf2Bin, maxOf3Bin, endBin);

                            for (int c = 0; c < reducedSpectralLength; c++)
                            {
                                aggregateArray[c] += reducedArray[c];

                                // Calculate the DELTA values TWO OPTIONS ##################################################
                                double delta = Math.Abs(reducedArray[c] - previousArray[c]);

                                //double delta = reducedArray[c] - previousArray[c];
                                //if (delta < 0.0)  delta = 0.0;
                                //double delta = previousArray[c]; //previous array - i.e. do not calculate delta
                                deltaArray[c] += delta;
                            }

                            previousArray = reducedArray;
                        }

                        aggreDictionary[key] = aggregateArray;
                        deltaDictionary[key] = deltaArray;
                        frameCount           = rowCount;
                    } //if (file.Exists)
                }     //foreach (string key in keyArray)

                instanceNumbersPerSpecies[speciesLabel - 1]++;
                frameNumbersPerInstance[j] += frameCount;

                // create the matrix of instance descriptions which consists of concatenated vectors
                // j = index of instance ID = row number
                int featureID = 0;
                foreach (string key in keyArray)
                {
                    int featureOffset = featureID * reducedSpectralLength;
                    for (int c = 0; c < reducedSpectralLength; c++)
                    {
                        // TWO OPTIONS: SUM OR AVERAGE ######################################
                        //instanceFeatureMatrix[j, featureOffset + c] = dictionary[key][c];
                        instanceFeatureMatrix[j, featureOffset + c] = aggreDictionary[key][c] / frameCount;
                    }

                    featureID++;
                }

                if (doDeltaFeatures)
                {
                    foreach (string key in keyArray)
                    {
                        int featureOffset = featureID * reducedSpectralLength;
                        for (int c = 0; c < reducedSpectralLength; c++)
                        {
                            // TWO OPTIONS: SUM OR AVERAGE ######################################
                            //instanceFeatureMatrix[j, featureOffset + c] = dictionary[key][c];
                            instanceFeatureMatrix[j, featureOffset + c] = deltaDictionary[key][c] / frameCount;
                        }

                        featureID++;
                    }
                } // if doDeltaFeatures
            }     // end for loop j over all instances

            LoggedConsole.WriteLine("Done!");

            LoggedConsole.WriteLine("\nSum of species number array = " + instanceNumbersPerSpecies.Sum());
            LoggedConsole.WriteLine("Sum of  frame  number array = " + frameNumbersPerInstance.Sum());
            bool   addLineNumbers            = true;
            string countsArrayOutputFilePath = Path.Combine(arguments.OutputDirectory.FullName, "BirdClef50_training_Counts.txt");

            FileTools.WriteArray2File(instanceNumbersPerSpecies, addLineNumbers, countsArrayOutputFilePath);

            // Initialise output data arrays
            Output output = new Output();

            output.FileID    = fileID;
            output.SpeciesID = speciesID;
            output.InstanceNumbersPerSpecies = instanceNumbersPerSpecies;
            output.ReducedSpectralLength     = reducedSpectralLength;

            // INIT array of frame counts
            output.FrameNumbersPerInstance = frameNumbersPerInstance;

            // matrix: each row= one instance;  each column = one feature
            output.InstanceFeatureMatrix = instanceFeatureMatrix;

            output.Weights = weights;

            return(output);
        } // GetInstanceRepresentations()
        /// <summary>
        /// Remove events whose acoustic profile does not match that of a flying fox.
        /// </summary>
        /// <param name="events">unfiltered acoustic events.</param>
        /// <param name="sonogram">includes matrix of spectrogram values.</param>
        /// <returns>filtered acoustic events.</returns>
        private static List <AcousticEvent> FilterEventsForSpectralProfile(List <AcousticEvent> events, BaseSonogram sonogram)
        {
            double[,] spectrogramData = sonogram.Data;

            //int colCount = spectrogramData.GetLength(1);

            // The following freq bins are used to demarcate freq bands for spectral tests below.
            // The hertz values are hard coded but could be included in the config.yml file.
            int maxBin        = (int)Math.Round(8000 / sonogram.FBinWidth);
            int fourKiloHzBin = (int)Math.Round(4000 / sonogram.FBinWidth);
            int oneKiloHzBin  = (int)Math.Round(1000 / sonogram.FBinWidth);

            var filteredEvents = new List <AcousticEvent>();

            foreach (AcousticEvent ae in events)
            {
                int startFrame = ae.Oblong.RowTop;

                //int endFrame = ae.Oblong.RowBottom;

                // get all the frames of the acoustic event
                //var subMatrix = DataTools.Submatrix(spectrogramData, startFrame, 0, endFrame, colCount - 1);

                // get only the frames from centre of the acoustic event
                var subMatrix          = DataTools.Submatrix(spectrogramData, startFrame + 1, 0, startFrame + 4, maxBin);
                var spectrum           = MatrixTools.GetColumnAverages(subMatrix);
                var normalisedSpectrum = DataTools.normalise(spectrum);
                normalisedSpectrum = DataTools.filterMovingAverageOdd(normalisedSpectrum, 11);
                var maxId = DataTools.GetMaxIndex(normalisedSpectrum);

                //var hzMax = (int)Math.Ceiling(maxId * sonogram.FBinWidth);

                // Do TESTS to determine if event has spectrum matching a Flying fox.
                // Test 1: Spectral maximum should be below 4 kHz.
                bool passTest1 = maxId < fourKiloHzBin;

                // Test 2: There should be little energy in 0-1 kHz band.
                var    subband1Khz  = DataTools.Subarray(normalisedSpectrum, 0, oneKiloHzBin);
                double bandArea1    = subband1Khz.Sum();
                double energyRatio1 = bandArea1 / normalisedSpectrum.Sum();

                // 0.125  = 1/8.  i.e. test requires that energy in 0-1kHz band is less than average in all 8 kHz bands
                // 0.0938 = 3/32. i.e. test requires that energy in 0-1kHz band is less than 3/4 average in all 8 kHz bands
                // 0.0625 = 1/16. i.e. test requires that energy in 0-1kHz band is less than half average in all 8 kHz bands
                bool passTest2 = !(energyRatio1 > 0.1);

                // Test 3: There should be little energy in 4-5 kHz band.
                var    subband4Khz  = DataTools.Subarray(normalisedSpectrum, fourKiloHzBin, oneKiloHzBin);
                double bandArea2    = subband4Khz.Sum();
                double energyRatio2 = bandArea2 / normalisedSpectrum.Sum();
                bool   passTest3    = !(energyRatio2 > 0.125);

                // TODO write method to determine similarity of spectrum to a true flying fox spectrum.
                // Problem: it is not certain how variable the FF spectra are.
                // In ten minutes of recording used so far, which include 14-15 obvious calls, there appear to be two spectral types.
                // One type has three peaks at around 1.5 kHz, 3 kHz and 6 kHz.
                // The other type have two peaks around 2.5 and 5.5 kHz.

                //if (passTest1)
                //if (true)
                if (passTest1 && passTest2 && passTest3)
                {
                    filteredEvents.Add(ae);

                    //DEBUG SPECTRAL PROFILES: UNCOMMENT following lines to get spectral profiles of the events.

                    /*
                     * double startSecond = ae.EventStartSeconds - ae.SegmentStartSeconds;
                     * string name = "CallSpectrum " + (ae.SegmentStartSeconds / 60) + "m" + (int)Math.Floor(startSecond) + "s hzMax" + hzMax;
                     * var bmp2 = GraphsAndCharts.DrawGraph(name, normalisedSpectrum, 100);
                     * bmp2.Save(Path.Combine(@"PATH\Towsey.PteropusSpecies", name + ".png"));
                     */
                }
            }

            return(filteredEvents);
        }
Exemple #16
0
        } //PruneClusters2()

        /// <summary>
        /// returns a value between 0-1
        /// 1- fractional Hamming Distance
        /// </summary>
        public static double HammingSimilarity(double[] v1, double[] v2)
        {
            int hammingDistance = DataTools.HammingDistance(v1, v2);

            return(1 - (hammingDistance / (double)v1.Length));
        }
        /// <summary>
        /// THis method does the work.
        /// </summary>
        /// <param name="audioRecording">the recording.</param>
        /// <param name="configuration">the config file.</param>
        /// <param name="profileName">name of call/event type to be found.</param>
        /// <param name="segmentStartOffset">where one segment is located in the total recording.</param>
        /// <returns>a list of events.</returns>
        private static RecognizerResults WingBeats(AudioRecording audioRecording, Config configuration, string profileName, TimeSpan segmentStartOffset)
        {
            ConfigFile.TryGetProfile(configuration, profileName, out var profile);

            // get the common properties
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "Pteropus species";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "Pteropus";

            // The following parameters worked well on a ten minute recording containing 14-16 calls.
            // Note: if you lower the dB threshold, you need to increase maxDurationSeconds
            int    minHz = profile.GetIntOrNull(AnalysisKeys.MinHz) ?? 100;
            int    maxHz = profile.GetIntOrNull(AnalysisKeys.MaxHz) ?? 3000;
            double minDurationSeconds = profile.GetDoubleOrNull(AnalysisKeys.MinDuration) ?? 1.0;
            double maxDurationSeconds = profile.GetDoubleOrNull(AnalysisKeys.MaxDuration) ?? 10.0;
            double decibelThreshold   = profile.GetDoubleOrNull("DecibelThreshold") ?? 6.0;
            double dctDuration        = profile.GetDoubleOrNull("DctDuration") ?? 1.0;
            double dctThreshold       = profile.GetDoubleOrNull("DctThreshold") ?? 0.5;
            double minOscFreq         = profile.GetDoubleOrNull("MinOscilFreq") ?? 4.0;
            double maxOscFreq         = profile.GetDoubleOrNull("MaxOscilFreq") ?? 6.0;
            double eventThreshold     = profile.GetDoubleOrNull("EventThreshold") ?? 0.3;

            //######################

            //2. Don't use samples in this recognizer.
            //var samples = audioRecording.WavReader.Samples;
            //Instead, convert each segment to a spectrogram.
            var sonogram     = GetSonogram(configuration, audioRecording);
            var decibelArray = SNR.CalculateFreqBandAvIntensity(sonogram.Data, minHz, maxHz, sonogram.NyquistFrequency);

            // Look for wing beats using oscillation detector

            /*
             * int scoreSmoothingWindow = 11; // sets a default that was good for Cane toad
             * Oscillations2019.Execute(
             * (SpectrogramStandard)sonogram,
             *  minHz,
             *  maxHz,
             *  decibelThreshold,
             *  dctDuration,
             *  (int)Math.Floor(minOscFreq),
             *  (int)Math.Floor(maxOscFreq),
             *  dctThreshold,
             *  eventThreshold,
             *  minDurationSeconds,
             *  maxDurationSeconds,
             *  scoreSmoothingWindow,
             *  out var scores,
             *  out var acousticEvents,
             *  //out var hits,
             *  segmentStartOffset);
             */
            Oscillations2012.Execute(
                (SpectrogramStandard)sonogram,
                minHz,
                maxHz,

                //decibelThreshold,
                dctDuration,
                (int)Math.Floor(minOscFreq),
                (int)Math.Floor(maxOscFreq),
                dctThreshold,
                eventThreshold,
                minDurationSeconds,
                maxDurationSeconds,
                out var scores,
                out var acousticEvents,
                out var hits,
                segmentStartOffset);

            // prepare plots
            double intensityNormalisationMax = 3 * decibelThreshold;
            var    normThreshold             = decibelThreshold / intensityNormalisationMax;
            var    normalisedIntensityArray  = DataTools.NormaliseInZeroOne(decibelArray, 0, intensityNormalisationMax);
            var    plot1 = new Plot(speciesName + " Wing-beat band", normalisedIntensityArray, normThreshold);
            var    plot2 = new Plot(speciesName + " Wing-beat Osc Score", scores, eventThreshold);
            var    plots = new List <Plot> {
                plot1, plot2
            };

            // ######################################################################

            // add additional information about the recording and sonogram properties from which the event is derived.
            acousticEvents.ForEach(ae =>
            {
                ae.FileName               = audioRecording.BaseName;
                ae.SpeciesName            = speciesName;
                ae.Name                   = abbreviatedSpeciesName + profileName;
                ae.Profile                = profileName;
                ae.SegmentDurationSeconds = audioRecording.Duration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                var frameOffset           = sonogram.FrameStep;
                var frameDuration         = sonogram.FrameDuration;
                ae.SetTimeAndFreqScales(frameOffset, frameDuration, sonogram.FBinWidth);

                //UNCOMMENT following lines to get spectral profiles of the Wingbeat events.

                /*    double[,] spectrogramData = sonogram.Data;
                 *  int maxBin = (int)Math.Round(8000 / sonogram.FBinWidth);
                 *  double startSecond = ae.EventStartSeconds - ae.SegmentStartSeconds;
                 *  int startFrame = (int)Math.Round(startSecond / sonogram.FrameStep);
                 *  int frameLength = (int)Math.Round(ae.EventDurationSeconds / sonogram.FrameStep);
                 *  int endFrame = startFrame + frameLength;
                 *
                 *  // get only the frames from centre of the acoustic event
                 *  var subMatrix = DataTools.Submatrix(spectrogramData, startFrame + 10, 0, endFrame - 10, maxBin);
                 *  var spectrum = MatrixTools.GetColumnAverages(subMatrix);
                 *  var normalisedSpectrum = DataTools.normalise(spectrum);
                 *  normalisedSpectrum = DataTools.filterMovingAverageOdd(normalisedSpectrum, 11);
                 *  var maxId = DataTools.GetMaxIndex(normalisedSpectrum);
                 *  var hzMax = (int)Math.Ceiling(maxId * sonogram.FBinWidth);
                 *  string name = "BeatSpectrum " + (ae.SegmentStartSeconds / 60) + "m" + (int)Math.Floor(startSecond) + "s hzMax" + hzMax;
                 *  var bmp2 = GraphsAndCharts.DrawGraph(name, normalisedSpectrum, 100);
                 *
                 *  //Set required path
                 *  bmp2.Save(Path.Combine(@"C:\PATH", name + ".png"));
                 */
            });

            return(new RecognizerResults()
            {
                Events = acousticEvents,
                Hits = null,
                ScoreTrack = null,
                Plots = plots,
                Sonogram = sonogram,
            });
        }
Exemple #18
0
        public Tuple <int, int, int[], List <double[]> > TrainNet(List <double[]> trainingData, int maxIter, int seed, int initialWtCount)
        {
            int dataSetSize = trainingData.Count;

            int[] randomArray = RandomNumber.RandomizeNumberOrder(dataSetSize, seed); //randomize order of trn set

            // bool skippedBecauseFull;
            int[] inputCategory = new int[dataSetSize]; //stores the winning OP node for each current  input signal
            int[] prevCategory  = new int[dataSetSize]; //stores the winning OP node for each previous input signal
            this.InitialiseWtArrays(trainingData, randomArray, initialWtCount);

            //{********* GO THROUGH THE TRAINING SET for 1 to MAX ITERATIONS *********}
            //repeat //{training set until max iter or trn set learned}
            int[] opNodeWins      = null;  //stores the number of times each OP node wins
            int   iterNum         = 0;
            bool  trainSetLearned = false; //     : boolean;

            while (!trainSetLearned && iterNum < maxIter)
            {
                iterNum++;
                opNodeWins = new int[this.OPSize];      //stores the number of times each OP node wins

                //initialise convergence criteria.  Want stable F2node allocations
                trainSetLearned = true;
                int changedCategory = 0;

                //{READ AND PROCESS signals until end of the data file}
                for (int sigNum = 0; sigNum < dataSetSize; sigNum++)
                {
                    //select an input signal. Later use sigID to enable test of convergence
                    int sigID = sigNum; // do signals in order
                    if (RandomiseTrnSetOrder)
                    {
                        sigID = randomArray[sigNum]; //pick at random
                    }

                    //{*********** PASS ONE INPUT SIGNAL THROUGH THE NETWORK ***********}
                    double[] OP        = this.PropagateIP2OP(trainingData[sigID]); //output = AND divided by OR of two vectors
                    int      index     = DataTools.GetMaxIndex(OP);
                    double   winningOP = OP[index];

                    //create new category if similarity OP of best matching node is too low
                    if (winningOP < this.VigilanceRho)
                    {
                        this.ChangeWtsOfFirstUncommittedNode(trainingData[sigID]);
                    }

                    inputCategory[sigID] = index; //winning F2 node for current input
                    opNodeWins[index]++;

                    //{test if training set is learned ie each signal is classified to the same F2 node as previous iteration}
                    if (inputCategory[sigID] != prevCategory[sigID])
                    {
                        trainSetLearned = false;
                        changedCategory++;
                    }
                } //end loop over all signal inputs

                //set the previous categories
                for (int x = 0; x < dataSetSize; x++)
                {
                    prevCategory[x] = inputCategory[x];
                }

                //remove committed F2 nodes that are not having wins
                for (int j = 0; j < this.OPSize; j++)
                {
                    if (this.committedNode[j] && opNodeWins[j] == 0)
                    {
                        this.committedNode[j] = false;
                    }
                }

                if (Verbose)
                {
                    LoggedConsole.WriteLine(" iter={0:D2}  committed=" + this.CountCommittedF2Nodes() + "\t changedCategory=" + changedCategory, iterNum);
                }

                if (trainSetLearned)
                {
                    break;
                }
            } //end of while (! trainSetLearned or (iterNum < maxIter) or terminate);

            return(Tuple.Create(iterNum, this.CountCommittedF2Nodes(), inputCategory, this.wts));
        } //TrainNet()
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="audioRecording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording audioRecording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            const double minAmplitudeThreshold = 0.1;
            const int    percentile            = 5;
            const double scoreThreshold        = 0.3;
            const bool   doFiltering           = true;
            const int    windowWidth           = 1024;
            const int    signalBuffer          = windowWidth * 2;

            //string path = @"C:\SensorNetworks\WavFiles\Freshwater\savedfortest.wav";
            //audioRecording.Save(path); // this does not work
            int sr      = audioRecording.SampleRate;
            int nyquist = audioRecording.Nyquist;

            // Get a value from the config file - with a backup default
            //int minHz = (int?)configuration[AnalysisKeys.MinHz] ?? 600;

            // Get a value from the config file - with no default, throw an exception if value is not present
            //int maxHz = ((int?)configuration[AnalysisKeys.MaxHz]).Value;

            // Get a value from the config file - without a string accessor, as a double
            //double someExampleSettingA = (double?)configuration.someExampleSettingA ?? 0.0;

            // common properties
            //string speciesName = (string)configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            //string abbreviatedSpeciesName = (string)configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // min score for an acceptable event
            double eventThreshold = (double)configuration.GetDoubleOrNull(AnalysisKeys.EventThreshold);

            // get samples
            var samples = audioRecording.WavReader.Samples;

            double[] bandPassFilteredSignal = null;

            if (doFiltering)
            {
                // high pass filter
                int      windowLength = 71;
                double[] highPassFilteredSignal;
                DSP_IIRFilter.ApplyMovingAvHighPassFilter(samples, windowLength, out highPassFilteredSignal);

                //DSP_IIRFilter filter2 = new DSP_IIRFilter("Chebyshev_Highpass_400");
                //int order2 = filter2.order;
                //filter2.ApplyIIRFilter(samples, out highPassFilteredSignal);

                // Amplify 40dB and clip to +/-1.0;
                double factor = 100; // equiv to 20dB
                highPassFilteredSignal = DspFilters.AmplifyAndClip(highPassFilteredSignal, factor);

                //low pass filter
                string        filterName = "Chebyshev_Lowpass_5000, scale*5";
                DSP_IIRFilter filter     = new DSP_IIRFilter(filterName);
                int           order      = filter.order;

                //System.LoggedConsole.WriteLine("\nTest " + filterName + ", order=" + order);
                filter.ApplyIIRFilter(highPassFilteredSignal, out bandPassFilteredSignal);
            }
            else // do not filter because already filtered - using Chris's filtered recording
            {
                bandPassFilteredSignal = samples;
            }

            // calculate an amplitude threshold that is above Nth percentile of amplitudes in the subsample
            int[]  histogramOfAmplitudes;
            double minAmplitude;
            double maxAmplitude;
            double binWidth;
            int    window = 66;

            Histogram.GetHistogramOfWaveAmplitudes(bandPassFilteredSignal, window, out histogramOfAmplitudes, out minAmplitude, out maxAmplitude, out binWidth);
            int percentileBin = Histogram.GetPercentileBin(histogramOfAmplitudes, percentile);

            double amplitudeThreshold = (percentileBin + 1) * binWidth;

            if (amplitudeThreshold < minAmplitudeThreshold)
            {
                amplitudeThreshold = minAmplitudeThreshold;
            }

            bool doAnalysisOfKnownExamples = true;

            if (doAnalysisOfKnownExamples)
            {
                // go to fixed location to check
                //1:02.07, 1:07.67, 1:12.27, 1:12.42, 1:12.59, 1:12.8, 1.34.3, 1:35.3, 1:40.16, 1:50.0, 2:05.9, 2:06.62, 2:17.57, 2:21.0
                //2:26.33, 2:43.07, 2:43.15, 3:16.55, 3:35.09, 4:22.44, 4:29.9, 4:42.6, 4:51.48, 5:01.8, 5:21.15, 5:22.72, 5:32.37, 5.36.1,
                //5:42.82, 6:03.5, 6:19.93, 6:21.55, 6:42.0, 6:42.15, 6:46.44, 7:12.17, 7:42.65, 7:45.86, 7:46.18, 7:52.38, 7:59.11, 8:10.63,
                //8:14.4, 8:14.63, 8_15_240, 8_46_590, 8_56_590, 9_25_77, 9_28_94, 9_30_5, 9_43_9, 10_03_19, 10_24_26, 10_24_36, 10_38_8,
                //10_41_08, 10_50_9, 11_05_13, 11_08_63, 11_44_66, 11_50_36, 11_51_2, 12_04_93, 12_10_05, 12_20_78, 12_27_0, 12_38_5,
                //13_02_25, 13_08_18, 13_12_8, 13_25_24, 13_36_0, 13_50_4, 13_51_2, 13_57_87, 14_15_00, 15_09_74, 15_12_14, 15_25_79

                //double[] times = { 2.2, 26.589, 29.62 };
                //double[] times = { 2.2, 3.68, 10.83, 24.95, 26.589, 27.2, 29.62 };
                //double[] times = { 2.2, 3.68, 10.83, 24.95, 26.589, 27.2, 29.62, 31.39, 62.1, 67.67, 72.27, 72.42, 72.59, 72.8, 94.3, 95.3,
                //                   100.16, 110.0, 125.9, 126.62, 137.57, 141.0, 146.33, 163.07, 163.17, 196.55, 215.09, 262.44, 269.9, 282.6,
                //                   291.48, 301.85, 321.18, 322.72, 332.37, 336.1, 342.82, 363.5, 379.93, 381.55, 402.0, 402.15, 406.44, 432.17,
                //                   462.65, 465.86, 466.18, 472.38, 479.14, 490.63, 494.4, 494.63, 495.240, 526.590, 536.590, 565.82, 568.94,
                //                   570.5, 583.9, 603.19, 624.26, 624.36, 638.8, 641.08, 650.9, 65.13, 68.63, 704.66,
                //                   710.36, 711.2, 724.93, 730.05, 740.78, 747.05, 758.5, 782.25, 788.18, 792.8,
                //                   805.24, 816.03, 830.4, 831.2, 837.87, 855.02, 909.74, 912.14, 925.81  };

                var filePath = new FileInfo(@"C:\SensorNetworks\WavFiles\Freshwater\GruntSummaryRevisedAndEditedByMichael.csv");
                List <CatFishCallData> data = Csv.ReadFromCsv <CatFishCallData>(filePath, true).ToList();

                //var catFishCallDatas = data as IList<CatFishCallData> ?? data.ToList();
                int count = data.Count();

                var subSamplesDirectory = outputDirectory.CreateSubdirectory("testSubsamples_5000LPFilter");

                //for (int t = 0; t < times.Length; t++)
                foreach (var fishCall in data)
                {
                    //Image bmp1 = IctalurusFurcatus.AnalyseLocation(bandPassFilteredSignal, sr, times[t], windowWidth);

                    // use following line where using time in seconds
                    //int location = (int)Math.Round(times[t] * sr); //assume location points to start of grunt
                    //double[] subsample = DataTools.Subarray(bandPassFilteredSignal, location - signalBuffer, 2 * signalBuffer);

                    // use following line where using sample
                    int location1 = fishCall.Sample / 2;                        //assume Chris's sample location points to centre of grunt. Divide by 2 because original recording was 44100.
                    int location  = (int)Math.Round(fishCall.TimeSeconds * sr); //assume location points to centre of grunt

                    double[] subsample = DataTools.Subarray(bandPassFilteredSignal, location - signalBuffer, 2 * signalBuffer);

                    // calculate an amplitude threshold that is above 95th percentile of amplitudes in the subsample
                    //int[] histogramOfAmplitudes;
                    //double minAmplitude;
                    //double maxAmplitude;
                    //double binWidth;
                    //int window = 70;
                    //int percentile = 90;
                    //Histogram.GetHistogramOfWaveAmplitudes(subsample, window, out histogramOfAmplitudes, out minAmplitude, out maxAmplitude, out binWidth);
                    //int percentileBin = Histogram.GetPercentileBin(histogramOfAmplitudes, percentile);

                    //double amplitudeThreshold = (percentileBin + 1) * binWidth;
                    //if (amplitudeThreshold < minAmplitudeThreshold) amplitudeThreshold = minAmplitudeThreshold;

                    double[] scores1 = AnalyseWaveformAtLocation(subsample, amplitudeThreshold, scoreThreshold);
                    string   title1  = $"scores={fishCall.Timehms}";
                    Image    bmp1    = GraphsAndCharts.DrawGraph(title1, scores1, subsample.Length, 300, 1);

                    //bmp1.Save(path1.FullName);

                    string title2 = $"tStart={fishCall.Timehms}";
                    Image  bmp2   = GraphsAndCharts.DrawWaveform(title2, subsample, 1);
                    var    path1  = subSamplesDirectory.CombineFile($"scoresForTestSubsample_{fishCall.TimeSeconds}secs.png");

                    //var path2 = subSamplesDirectory.CombineFile($@"testSubsample_{times[t]}secs.wav.png");
                    Image[] imageList = { bmp2, bmp1 };
                    Image   bmp3      = ImageTools.CombineImagesVertically(imageList);
                    bmp3.Save(path1.FullName);

                    //write wave form to txt file for later work in XLS
                    //var path3 = subSamplesDirectory.CombineFile($@"testSubsample_{times[t]}secs.wav.csv");
                    //signalBuffer = 800;
                    //double[] subsample2 = DataTools.Subarray(bandPassFilteredSignal, location - signalBuffer, 3 * signalBuffer);
                    //FileTools.WriteArray2File(subsample2, path3.FullName);
                }
            }

            int signalLength = bandPassFilteredSignal.Length;

            // count number of 1000 sample segments
            int blockLength = 1000;
            int blockCount  = signalLength / blockLength;

            int[]    indexOfMax = new int[blockCount];
            double[] maxInBlock = new double[blockCount];

            for (int i = 0; i < blockCount; i++)
            {
                double max        = -2.0;
                int    blockStart = blockLength * i;
                for (int s = 0; s < blockLength; s++)
                {
                    double absValue = Math.Abs(bandPassFilteredSignal[blockStart + s]);
                    if (absValue > max)
                    {
                        max           = absValue;
                        maxInBlock[i] = max;
                        indexOfMax[i] = blockStart + s;
                    }
                }
            }

            // transfer max values to a list
            var indexList = new List <int>();

            for (int i = 1; i < blockCount - 1; i++)
            {
                // only find the blocks that contain a max value that is > neighbouring blocks
                if (maxInBlock[i] > maxInBlock[i - 1] && maxInBlock[i] > maxInBlock[i + 1])
                {
                    indexList.Add(indexOfMax[i]);
                }

                //ALTERNATIVELY
                // look at max in each block
                //indexList.Add(indexOfMax[i]);
            }

            // now process neighbourhood of each max
            int binCount = windowWidth / 2;

            FFT.WindowFunc wf               = FFT.Hamming;
            var            fft              = new FFT(windowWidth, wf);
            int            maxHz            = 1000;
            double         hzPerBin         = nyquist / (double)binCount;
            int            requiredBinCount = (int)Math.Round(maxHz / hzPerBin);

            // init list of events
            List <AcousticEvent> events = new List <AcousticEvent>();

            double[] scores = new double[signalLength]; // init of score array

            int id = 0;

            foreach (int location in indexList)
            {
                //System.LoggedConsole.WriteLine("Location " + location + ", id=" + id);

                int start = location - binCount;
                if (start < 0)
                {
                    continue;
                }

                int end = location + binCount;
                if (end >= signalLength)
                {
                    continue;
                }

                double[] subsampleWav = DataTools.Subarray(bandPassFilteredSignal, start, windowWidth);

                var spectrum = fft.Invoke(subsampleWav);

                // convert to power
                spectrum = DataTools.SquareValues(spectrum);
                spectrum = DataTools.filterMovingAverageOdd(spectrum, 3);
                spectrum = DataTools.normalise(spectrum);
                var subBandSpectrum = DataTools.Subarray(spectrum, 1, requiredBinCount); // ignore DC in bin zero.

                // now do some tests on spectrum to determine if it is a candidate grunt
                bool eventFound = false;

                double[] scoreArray = CalculateScores(subBandSpectrum, windowWidth);
                double   score      = scoreArray[0];

                if (score > scoreThreshold)
                {
                    eventFound = true;
                }

                if (eventFound)
                {
                    for (int i = location - binCount; i < location + binCount; i++)
                    {
                        scores[location] = score;
                    }

                    var    startTime  = TimeSpan.FromSeconds((location - binCount) / (double)sr);
                    string startLabel = startTime.Minutes + "." + startTime.Seconds + "." + startTime.Milliseconds;
                    Image  image4     = GraphsAndCharts.DrawWaveAndFft(subsampleWav, sr, startTime, spectrum, maxHz * 2, scoreArray);

                    var path4 = outputDirectory.CreateSubdirectory("subsamples").CombineFile($@"subsample_{location}_{startLabel}.png");
                    image4.Save(path4.FullName);

                    // have an event, store the data in the AcousticEvent class
                    double duration = 0.2;
                    int    minFreq  = 50;
                    int    maxFreq  = 1000;
                    var    anEvent  = new AcousticEvent(segmentStartOffset, startTime.TotalSeconds, duration, minFreq, maxFreq);
                    anEvent.Name = "grunt";

                    //anEvent.Name = DataTools.WriteArrayAsCsvLine(subBandSpectrum, "f4");
                    anEvent.Score = score;
                    events.Add(anEvent);
                }

                id++;
            }

            // make a spectrogram
            var config = new SonogramConfig
            {
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = configuration.GetDoubleOrNull(AnalysisKeys.NoiseBgThreshold) ?? 0.0,
            };
            var sonogram = (BaseSonogram) new SpectrogramStandard(config, audioRecording.WavReader);

            //// when the value is accessed, the indices are calculated
            //var indices = getSpectralIndexes.Value;

            //// check if the indices have been calculated - you shouldn't actually need this
            //if (getSpectralIndexes.IsValueCreated)
            //{
            //    // then indices have been calculated before
            //}

            var plot = new Plot(this.DisplayName, scores, eventThreshold);

            return(new RecognizerResults()
            {
                Events = events,
                Hits = null,

                //ScoreTrack = null,
                Plots = plot.AsList(),
                Sonogram = sonogram,
            });
        }
        /// <summary>
        /// The CORE ANALYSIS METHOD
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values -
            int frameLength = 1024;

            if (configDict.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]);
            }

            double windowOverlap              = 0.0;
            int    minHz                      = int.Parse(configDict["MIN_HZ"]);
            int    minFormantgap              = int.Parse(configDict["MIN_FORMANT_GAP"]);
            int    maxFormantgap              = int.Parse(configDict["MAX_FORMANT_GAP"]);
            double decibelThreshold           = double.Parse(configDict["DECIBEL_THRESHOLD"]);   //dB
            double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double callDuration               = double.Parse(configDict["CALL_DURATION"]);       // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameLength,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            }; //default values config

            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = freqBinWidth;

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int numberOfBins = 64;
            int minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int maxbin       = minBin + numberOfBins - 1;
            int maxHz        = (int)Math.Round(minHz + (numberOfBins * freqBinWidth));

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin);

            int callSpan = (int)Math.Round(callDuration * framesPerSecond);

            //#############################################################################################################################################
            //ii: DETECT HARMONICS
            var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan);

            double[] dBArray     = results.Item1;
            double[] intensity   = results.Item2;   //an array of periodicity scores
            double[] periodicity = results.Item3;

            //intensity = DataTools.filterMovingAverage(intensity, 3);
            int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise

            double[] scoreArray = new double[intensity.Length];
            for (int r = 0; r < rowCount; r++)
            {
                if (intensity[r] < harmonicIntensityThreshold)
                {
                    continue;
                }

                //ignore locations with incorrect formant gap
                double herzPeriod = periodicity[r] * freqBinWidth;
                if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap)
                {
                    continue;
                }

                //find freq having max power and use info to adjust score.
                //expect humans to have max < 1000 Hz
                double[] spectrum = MatrixTools.GetRow(sonogram.Data, r);
                for (int j = 0; j < noiseBound; j++)
                {
                    spectrum[j] = 0.0;
                }

                int    maxIndex         = DataTools.GetMaxIndex(spectrum);
                int    freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth);
                double discount         = 1.0;
                if (freqWithMaxPower < 1200)
                {
                    discount = 0.0;
                }

                if (intensity[r] > harmonicIntensityThreshold)
                {
                    scoreArray[r] = intensity[r] * discount;
                }
            }

            //transfer info to a hits matrix.
            var    hits      = new double[rowCount, colCount];
            double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits

            for (int r = 0; r < rowCount; r++)
            {
                if (scoreArray[r] < threshold)
                {
                    continue;
                }

                double herzPeriod = periodicity[r] * freqBinWidth;
                for (int c = minBin; c < maxbin; c++)
                {
                    //hits[r, c] = herzPeriod / (double)380;  //divide by 380 to get a relativePeriod;
                    hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap;  //to get a relativePeriod;
                }
            }

            //iii: CONVERT TO ACOUSTIC EVENTS
            double maxPossibleScore = 0.5;
            int    halfCallSpan     = callSpan / 2;
            var    predictedEvents  = new List <AcousticEvent>();

            for (int i = 0; i < rowCount; i++)
            {
                //assume one score position per crow call
                if (scoreArray[i] < 0.001)
                {
                    continue;
                }

                double        startTime = (i - halfCallSpan) / framesPerSecond;
                AcousticEvent ev        = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz);
                ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth);
                ev.Score           = scoreArray[i];
                ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold

                //ev.Score_MaxPossible = maxPossibleScore;
                predictedEvents.Add(ev);
            } //for loop

            Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold);

            return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration));
        } //Analysis()
 static void ConvertStringToSql(StringBuilder stringBuilder, string value)
 {
     DataTools.ConvertStringToSql(stringBuilder, "||", "'", AppendConversion, value);
 }
 static void ConvertStringToSql(StringBuilder stringBuilder, string value)
 {
     DataTools.ConvertStringToSql(stringBuilder, "||", null, AppendConversion, value, _extraEscapes);
 }
        ///// <summary>
        ///// This method merges all files of acoustic indices derived from a sequence of consecutive 6 hour recording,
        ///// that have a total duration of 24 hours. This was necesarry to deal with Jason's new regime of doing 24 hour recordings
        ///// in blocks of 6 hours.
        ///// </summary>
        //public static void ConcatenateSpectralIndexFiles1()
        //{
        //    // create an array that contains the names of csv file to be read.
        //    // The file names must be in the temporal order rquired for the resulting spectrogram image.
        //    string topLevelDirectory = @"C:\SensorNetworks\Output\SERF\SERFIndices_2013April01";
        //    string fileStem = "SERF_20130401";
        //    string[] names = {"SERF_20130401_000025_000",
        //                          "SERF_20130401_064604_000",
        //                          "SERF_20130401_133143_000",
        //                          "SERF_20130401_201721_000",
        //                              };
        //    //string topLevelDirectory = @"C:\SensorNetworks\Output\SERF\SERFIndices_2013June19";
        //    //string fileStem = "SERF_20130619";
        //    //string[] names = {"SERF_20130619_000038_000",
        //    //                  "SERF_20130619_064615_000",
        //    //                  "SERF_20130619_133153_000",
        //    //                  "SERF_20130619_201730_000",
        //    //                      };

        //    // ###############################################################
        //    // VERY IMPORTANT:  MUST MAKE SURE THE BELOW ARE CONSISTENT WITH THE DATA !!!!!!!!!!!!!!!!!!!!
        //    int sampleRate = 17640;
        //    int frameWidth = 256;
        //    // ###############################################################

        //    string[] level2Dirs = {names[0]+".wav",
        //                               names[1]+".wav",
        //                               names[2]+".wav",
        //                               names[3]+".wav",
        //                              };
        //    string level3Dir = "Towsey.Acoustic";
        //    string[] dirNames = {topLevelDirectory+@"\"+level2Dirs[0]+@"\"+level3Dir,
        //                             topLevelDirectory+@"\"+level2Dirs[1]+@"\"+level3Dir,
        //                             topLevelDirectory+@"\"+level2Dirs[2]+@"\"+level3Dir,
        //                             topLevelDirectory+@"\"+level2Dirs[3]+@"\"+level3Dir
        //                            };
        //    string[] fileExtentions = { ".ACI.csv",
        //                                    ".AVG.csv",
        //                                    ".BGN.csv",
        //                                    ".CVR.csv",
        //                                    ".TEN.csv",
        //                                    ".VAR.csv",
        //                                    "_Towsey.Acoustic.Indices.csv"
        //                                  };

        //    // this loop reads in all the Indices from consecutive csv files
        //    foreach (string extention in fileExtentions)
        //    {
        //        Console.WriteLine("\n\nFILE TYPE: " + extention);

        //        List<string> lines = new List<string>();

        //        for (int i = 0; i < dirNames.Length; i++)
        //        {
        //            string fName = names[i] + extention;
        //            string path = Path.Combine(dirNames[i], fName);
        //            var fileInfo = new FileInfo(path);
        //            Console.WriteLine(path);
        //            if (!fileInfo.Exists)
        //                Console.WriteLine("ABOVE FILE DOES NOT EXIST");

        //            var ipLines = FileTools.ReadTextFile(path);
        //            if (i != 0)
        //            {
        //                ipLines.RemoveAt(0); //remove the first line
        //            }
        //            lines.AddRange(ipLines);
        //        }
        //        string opFileName = fileStem + extention;
        //        string opPath = Path.Combine(topLevelDirectory, opFileName);
        //        FileTools.WriteTextFile(opPath, lines, false);

        //    } //end of all file extentions

        //    TimeSpan minuteOffset = TimeSpan.Zero; // assume recordings start at midnight
        //    TimeSpan xScale = TimeSpan.FromMinutes(60);
        //    double backgroundFilterCoeff = SpectrogramConstants.BACKGROUND_FILTER_COEFF;
        //    string colorMap = SpectrogramConstants.RGBMap_ACI_ENT_CVR;
        //    var cs1 = new LDSpectrogramRGB(minuteOffset, xScale, sampleRate, frameWidth, colorMap);
        //    cs1.BaseName = fileStem;
        //    cs1.ColorMode = colorMap;
        //    cs1.BackgroundFilter = backgroundFilterCoeff;
        //    var dirInfo = new DirectoryInfo(topLevelDirectory);
        //    cs1.ReadSpectralIndices(dirInfo, fileStem); // reads all known indices files
        //    if (cs1.GetCountOfSpectrogramMatrices() == 0)
        //    {
        //        Console.WriteLine("There are no spectrogram matrices in the dictionary.");
        //        return;
        //    }
        //    cs1.DrawGreyScaleSpectrograms(dirInfo, fileStem);

        //    colorMap = SpectrogramConstants.RGBMap_ACI_ENT_CVR;
        //    Image image1 = cs1.DrawFalseColourSpectrogram("NEGATIVE", colorMap);

        //    int nyquist = cs1.SampleRate / 2;
        //    int herzInterval = 1000;

        //    string title = String.Format("FALSE-COLOUR SPECTROGRAM: {0}      (scale:hours x kHz)       (colour: R-G-B={1})", fileStem, colorMap);
        //    Image titleBar = LDSpectrogramRGB.DrawTitleBarOfFalseColourSpectrogram(title, image1.Width);
        //    image1 = LDSpectrogramRGB.FrameLDSpectrogram(image1, titleBar, minuteOffset, cs1.IndexCalculationDuration, cs1.XTicInterval, nyquist, herzInterval);
        //    image1.Save(Path.Combine(dirInfo.FullName, fileStem + "." + colorMap + ".png"));

        //    colorMap = "BGN-AVG-VAR";
        //    Image image2 = cs1.DrawFalseColourSpectrogram("NEGATIVE", colorMap);
        //    title = String.Format("FALSE-COLOUR SPECTROGRAM: {0}      (scale:hours x kHz)       (colour: R-G-B={1})", fileStem, colorMap);
        //    titleBar = LDSpectrogramRGB.DrawTitleBarOfFalseColourSpectrogram(title, image2.Width);
        //    image2 = LDSpectrogramRGB.FrameLDSpectrogram(image2, titleBar, minuteOffset, cs1.IndexCalculationDuration, cs1.XTicInterval, nyquist, herzInterval);
        //    image2.Save(Path.Combine(dirInfo.FullName, fileStem + "." + colorMap + ".png"));
        //    Image[] array = new Image[2];
        //    array[0] = image1;
        //    array[1] = image2;
        //    Image image3 = ImageTools.CombineImagesVertically(array);
        //    image3.Save(Path.Combine(dirInfo.FullName, fileStem + ".2MAPS.png"));
        //}

        /// <summary>
        /// This method rearranges the content of a false-colour spectrogram according to the acoustic cluster or acoustic state to which each minute belongs.
        /// The time scale is added in afterwards - must overwrite the previous time scale and title bar.
        /// THis method was writtent to examine the cluster content of recordings analysed by Mangalam using a 10x10 SOM.
        /// The output image was used in the paper presented by Mangalam to BDVA2015 in Tasmania. (Big data, visual analytics).
        /// </summary>
        public static void ExtractSOMClusters1()
        {
            string opDir = @"C:\SensorNetworks\Output\Mangalam_BDVA2015\";

            //string fileStem = @"BYR2_20131016";
            //string inputImagePath = @"C:\SensorNetworks\Output\Mangalam_BDVA2015\BYR2_20131016.ACI-ENT-EVN.png";
            //string clusterFile = opDir + "SE 13 Oct - Cluster-node list.csv";

            //string fileStem = @"BYR2_20131017";
            //string inputImagePath = opDir + fileStem + ".ACI-ENT-EVN.png";
            //string clusterFile = opDir + "BY2-17Oct - node_clus_map.csv";

            string fileStem       = @"SERF-SE_20101013";
            string inputImagePath = @"C:\SensorNetworks\Output\Mangalam_BDVA2015\SERF-SE_20101013.ACI-ENT-EVN.png";
            string clusterFile    = opDir + "SE 13 Oct - Cluster-node list.csv";

            string opFileName = fileStem + ".SOMClusters.png";

            int        clusterCount = 27;  // from fuzzy c-clustering
            int        nodeCount    = 100; // from the 10x10 SOM
            List <Pen> pens         = ImageTools.GetColorPalette(clusterCount);
            Pen        whitePen     = new Pen(Color.White);
            Pen        blackPen     = new Pen(Color.Black);

            //SizeF stringSize = new SizeF();
            Font stringFont = new Font("Arial", 12, FontStyle.Bold);

            //Font stringFont = new Font("Tahoma", 9);

            // ###############################################################
            // VERY IMPORTANT:  MUST MAKE SURE THE BELOW ARE CONSISTENT WITH THE DATA !!!!!!!!!!!!!!!!!!!!
            int      sampleRate            = 22050;
            int      frameWidth            = 256;
            int      nyquist               = sampleRate / 2;
            int      herzInterval          = 1000;
            TimeSpan minuteOffset          = TimeSpan.Zero; // assume recordings start at midnight
            double   backgroundFilterCoeff = SpectrogramConstants.BACKGROUND_FILTER_COEFF;
            string   colorMap              = SpectrogramConstants.RGBMap_ACI_ENT_EVN;
            string   title = string.Format("SOM CLUSTERS of ACOUSTIC INDICES: recording {0}", fileStem);
            TimeSpan indexCalculationDuration = TimeSpan.FromSeconds(60); // seconds
            TimeSpan xTicInterval             = TimeSpan.FromMinutes(60); // 60 minutes or one hour.
            int      trackheight = 20;

            // ###############################################################

            // read in the assignment of cluster numbers to cluster LABEL

            string[] clusterLabel = { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a" };

            // read the data file
            List <string> lines     = FileTools.ReadTextFile(clusterFile);
            int           lineCount = lines.Count;

            int[] clusterHistogram = new int[clusterCount];

            //read in the image
            FileInfo fi = new FileInfo(inputImagePath);

            if (!fi.Exists)
            {
                Console.WriteLine("\n\n >>>>>>>> FILE DOES NOT EXIST >>>>>>: " + fi.Name);
            }

            Console.WriteLine("Reading file: " + fi.Name);
            Bitmap ipImage    = ImageTools.ReadImage2Bitmap(fi.FullName);
            int    imageWidth = ipImage.Width;
            int    imageHt    = ipImage.Height;

            //init the output image
            Image    opImage = new Bitmap(imageWidth, imageHt);
            Graphics gr      = Graphics.FromImage(opImage);

            gr.Clear(Color.Black);

            // construct cluster histogram
            for (int lineNumber = 0; lineNumber < lineCount; lineNumber++)
            {
                string[] words     = lines[lineNumber].Split(',');
                int      clusterID = int.Parse(words[2]);
                clusterHistogram[clusterID - 1]++;
            }

            // ranks cluster counts in descending order
            Tuple <int[], int[]> tuple = DataTools.SortArray(clusterHistogram);

            int[] sortOrder = tuple.Item1;

            // this loop re
            int opColumn           = 0;
            int clusterStartColumn = 0;

            for (int id = 0; id < clusterCount; id++)
            {
                int sortID = sortOrder[id];

                // create node array to store column images for this cluster
                List <Bitmap>[] nodeArray = new List <Bitmap> [nodeCount];
                for (int n = 0; n < nodeCount; n++)
                {
                    nodeArray[n] = new List <Bitmap>();
                }

                Console.WriteLine("Reading CLUSTER: " + (sortID + 1) + "  Label=" + clusterLabel[sortID]);

                // read through the entire list of minutes
                for (int lineNumber = 0; lineNumber < lineCount; lineNumber++)
                {
                    if (lineNumber == 0)
                    {
                        clusterStartColumn = opColumn;
                    }

                    string[] words     = lines[lineNumber].Split(',');
                    int      clusterID = int.Parse(words[2]) - 1; // -1 because matlab arrays start at 1.
                    int      nodeID    = int.Parse(words[1]) - 1;
                    if (clusterID == sortID)
                    {
                        // get image column
                        Rectangle rectangle = new Rectangle(lineNumber, 0, 1, imageHt);
                        Bitmap    column    = ipImage.Clone(rectangle, ipImage.PixelFormat);

                        nodeArray[nodeID].Add(column);
                    }
                }

                // cycle through the nodes and get the column images.
                // the purpose is to draw the column images in order of node number
                for (int n = 0; n < nodeCount; n++)
                {
                    int imageCount = nodeArray[n].Count;
                    if (nodeArray[n].Count == 0)
                    {
                        continue;
                    }

                    for (int i = 0; i < imageCount; i++)
                    {
                        Bitmap column = nodeArray[n][i];
                        gr.DrawImage(column, opColumn, 0);
                        gr.DrawLine(pens[id], opColumn, trackheight, opColumn, trackheight + trackheight);
                        gr.DrawLine(pens[id], opColumn, imageHt - trackheight, opColumn, imageHt);
                        opColumn++;
                    }

                    //gr.DrawLine(blackPen, opColumn - 1, imageHt - trackheight, opColumn - 1, imageHt - 10);
                }

                //FileInfo fi = new FileInfo(topLevelDirectory + name);
                //Console.WriteLine("Reading file: " + fi.Name);

                if (id >= clusterCount - 1)
                {
                    break;
                }

                gr.DrawLine(whitePen, opColumn - 1, 0, opColumn - 1, imageHt - trackheight - 1);
                gr.DrawLine(blackPen, opColumn - 1, imageHt - trackheight, opColumn - 1, imageHt);
                gr.DrawLine(blackPen, opColumn - 1, imageHt - trackheight, opColumn - 1, imageHt);

                int location = opColumn - ((opColumn - clusterStartColumn) / 2);
                gr.DrawString(clusterLabel[sortID], stringFont, Brushes.Black, new PointF(location - 10, imageHt - 19));
            }

            ////Draw the title bar
            Image titleBar = DrawTitleBarOfClusterSpectrogram(title, imageWidth);

            gr.DrawImage(titleBar, 0, 0);
            ////Draw the x-axis time scale bar
            //int trackHeight = 20;
            //TimeSpan fullDuration = TimeSpan.FromTicks(indexCalculationDuration.Ticks * imageWidth);
            //Bitmap timeBmp = ImageTrack.DrawTimeTrack(fullDuration, TimeSpan.Zero, imageWidth, trackHeight);

            //spgmImage = LDSpectrogramRGB.FrameLDSpectrogram(spgmImage, titleBar, minuteOffset, indexCalculationDuration, xTicInterval, nyquist, herzInterval);
            //Graphics gr = Graphics.FromImage(spgmImage);
            ////gr.Clear(Color.Black);
            //gr.DrawImage(titleBar, 0, 0); //draw in the top spectrogram
            //gr.DrawImage(timeBmp, 0, 20); //draw in the top spectrogram
            //gr.DrawImage(timeBmp, 0, imageHeight - 20); //draw in the top spectrogram

            opImage.Save(Path.Combine(opDir, opFileName));
        }
Exemple #24
0
        public void Execute(Arguments arguments)
        {
            LoggedConsole.WriteLine("feature learning process...");

            var inputDir     = @"D:\Mahnoosh\Liz\Least_Bittern\";
            var inputPath    = Path.Combine(inputDir, "TrainSet\\one_min_recordings");
            var trainSetPath = Path.Combine(inputDir, "TrainSet\\train_data");
            // var testSetPath = Path.Combine(inputDir, "TestSet");
            var configPath = @"D:\Mahnoosh\Liz\Least_Bittern\FeatureLearningConfig.yml";
            var resultDir  = Path.Combine(inputDir, "FeatureLearning");

            Directory.CreateDirectory(resultDir);

            // var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png");
            // var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png");
            // var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png");
            // var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png");
            // var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp");

            // +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1-min recordings

            var configFile = configPath.ToFileInfo();

            if (configFile == null)
            {
                throw new FileNotFoundException("No config file argument provided");
            }
            else if (!configFile.Exists)
            {
                throw new ArgumentException($"Config file {configFile.FullName} not found");
            }

            var configuration = ConfigFile.Deserialize <FeatureLearningSettings>(configFile);
            int patchWidth    =
                (configuration.MaxFreqBin - configuration.MinFreqBin + 1) / configuration.NumFreqBand;

            var clusteringOutputList = FeatureLearning.UnsupervisedFeatureLearning(configuration, inputPath);

            List <double[][]> allBandsCentroids = new List <double[][]>();

            for (int i = 0; i < clusteringOutputList.Count; i++)
            {
                var clusteringOutput = clusteringOutputList[i];

                // writing centroids to a csv file
                // note that Csv.WriteToCsv can't write data types like dictionary<int, double[]> (problems with arrays)
                // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv
                // it might be a better way to do this
                string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv");
                var    clusterCentroids     = clusteringOutput.ClusterIdCentroid.Values.ToArray();
                Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix());

                // sorting clusters based on size and output it to a csv file
                Dictionary <int, double> clusterIdSize = clusteringOutput.ClusterIdSize;
                int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize);

                // Write cluster ID and size to a CSV file
                string pathToClusterSizeCsvFile = Path.Combine(resultDir, "ClusterSize" + i.ToString() + ".csv");
                Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize);

                // Draw cluster image directly from clustering output
                List <KeyValuePair <int, double[]> > list = clusteringOutput.ClusterIdCentroid.ToList();
                double[][] centroids = new double[list.Count][];

                for (int j = 0; j < list.Count; j++)
                {
                    centroids[j] = list[j].Value;
                }

                allBandsCentroids.Add(centroids);

                List <double[, ]> allCentroids = new List <double[, ]>();
                for (int k = 0; k < centroids.Length; k++)
                {
                    // convert each centroid to a matrix in order of cluster ID
                    // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight);
                    // OR: in order of cluster size
                    double[,] cent = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, configuration.PatchHeight);

                    // normalize each centroid
                    double[,] normCent = DataTools.normalise(cent);

                    // add a row of zero to each centroid
                    double[,] cent2 = PatchSampling.AddRow(normCent);

                    allCentroids.Add(cent2);
                }

                // concatenate all centroids
                double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids);

                // Draw clusters
                var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix);
                clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone);
                var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp");
                clusterImage.Save(outputClusteringImage);
            }

            // extracting features
            FeatureExtraction.UnsupervisedFeatureExtraction(configuration, allBandsCentroids, trainSetPath, resultDir);
            LoggedConsole.WriteLine("Done...");
        }
Exemple #25
0
        private static void Main()
        {
            throw new NotSupportedException("THIS WILL FAIL IN PRODUCTION");
            Log.WriteLine("TESTING METHODS IN CLASS FileTools\n\n");

            bool doit1 = false;

            if (doit1) //test ReadTextFile(string fName)
            {
                string fName = testDir + "testTextFile.txt";
                var    array = ReadTextFile(fName);
                foreach (string line in array)
                {
                    LoggedConsole.WriteLine(line);
                }
            }//end test ReadTextFile(string fName)

            bool doit2 = false;

            if (doit2) //test WriteTextFile(string fName)
            {
                string fName = testDir + "testOfWritingATextFile.txt";
                var    array = new List <string>();
                array.Add("string1");
                array.Add("string2");
                array.Add("string3");
                array.Add("string4");
                array.Add("string5");
                WriteTextFile(fName, array);
            }//end test WriteTextFile(string fName)

            bool doit3 = false;

            if (doit3) //test ReadDoubles2Matrix(string fName)
            {
                string fName = testDir + "testOfReadingMatrixFile.txt";
                double[,] matrix = ReadDoubles2Matrix(fName);
                int rowCount = matrix.GetLength(0); //height
                int colCount = matrix.GetLength(1); //width

                //LoggedConsole.WriteLine("rowCount=" + rowCount + "  colCount=" + colCount);
                DataTools.writeMatrix(matrix);
            }//end test ReadDoubles2Matrix(string fName)

            bool doit4 = true;

            if (doit4) //test Method(parameters)
            {
                string fName = testDir + "testWriteOfMatrix2File.txt";
                double[,] matrix =
                {
                    {
                        0.1, 0.2, 0.3, 0.4, 0.5, 0.6,
                    },
                    {
                        0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
                    },
                    {
                        0.9, 1.0, 1.1, 1.2, 1.3, 1.4,
                    },
                };
                WriteMatrix2File(matrix, fName);
                LoggedConsole.WriteLine("Wrote following matrix to file " + fName);
                DataTools.writeMatrix(matrix);
            }//end test Method(string fName)

            //COPY THIS TEST TEMPLATE
            bool doit5 = false;

            if (doit5) //test Method(parameters)
            {
            }//end test Method(string fName)

            Log.WriteLine("\nFINISHED");    //end
            Log.WriteLine("CLOSE CONSOLE"); //end
        } //end MAIN
        /// <summary>
        /// Returns the following 18 values encapsulated in class EnvelopeAndFft
        /// 1) the minimum and maximum signal values
        /// 2) the average of absolute amplitudes for each frame
        /// 3) the minimum value in each frame
        /// 3) the maximum value in each frame.
        /// 3) the signal envelope as vector. i.e. the maximum of absolute amplitudes for each frame.
        /// 4) vector of frame energies
        /// 5) the high amplitude and clipping counts
        /// 6) the signal amplitude spectrogram
        /// 7) the power of the FFT Window, i.e. sum of squared window values.
        /// 8) the nyquist
        /// 9) the width of freq bin in Hz
        /// 10) the Nyquist bin ID
        /// AND OTHERS
        /// The returned info is used by Sonogram classes to draw sonograms and by Spectral Indices classes to calculate Spectral indices.
        /// Less than half the info is used to draw sonograms but it is difficult to disentangle calculation of all the info without
        /// reverting back to the old days when we used two classes and making sure they remain in synch.
        /// </summary>
        public static EnvelopeAndFft ExtractEnvelopeAndAmplSpectrogram(
            double[] signal,
            int sampleRate,
            double epsilon,
            int frameSize,
            int frameStep,
            string windowName = null)
        {
            // SIGNAL PRE-EMPHASIS helps with speech signals
            // Do not use this for environmental audio
            //if (config.DoPreemphasis)
            //{
            //    signal = DSP_Filters.PreEmphasis(signal, 0.96);
            //}

            int[,] frameIDs = FrameStartEnds(signal.Length, frameSize, frameStep);
            if (frameIDs == null)
            {
                throw new NullReferenceException("Thrown in EnvelopeAndFft.ExtractEnvelopeAndAmplSpectrogram(): int matrix, frameIDs, cannot be null.");
            }

            int frameCount = frameIDs.GetLength(0);

            // set up the FFT parameters
            if (windowName == null)
            {
                windowName = FFT.KeyHammingWindow;
            }

            FFT.WindowFunc w   = FFT.GetWindowFunction(windowName);
            var            fft = new FFT(frameSize, w);                     // init class which calculates the Matlab compatible .NET FFT

            double[,] spectrogram = new double[frameCount, fft.CoeffCount]; // init amplitude sonogram
            double minSignalValue = double.MaxValue;
            double maxSignalValue = double.MinValue;

            double[] average       = new double[frameCount];
            double[] minValues     = new double[frameCount];
            double[] maxValues     = new double[frameCount];
            double[] envelope      = new double[frameCount];
            double[] frameEnergy   = new double[frameCount];
            double[] frameDecibels = new double[frameCount];

            // for all frames
            for (int i = 0; i < frameCount; i++)
            {
                int start = i * frameStep;
                int end   = start + frameSize;

                // get average and envelope for current frame
                double frameMin    = signal[start];
                double frameMax    = signal[start];
                double frameSum    = signal[start];
                double total       = Math.Abs(signal[start]);
                double maxAbsValue = total;
                double energy      = 0;

                // for all values in frame
                for (int x = start + 1; x < end; x++)
                {
                    if (signal[x] > maxSignalValue)
                    {
                        maxSignalValue = signal[x];
                    }

                    if (signal[x] < minSignalValue)
                    {
                        minSignalValue = signal[x];
                    }

                    frameSum += signal[x];

                    // Get frame min and max
                    if (signal[x] < frameMin)
                    {
                        frameMin = signal[x];
                    }

                    if (signal[x] > frameMax)
                    {
                        frameMax = signal[x];
                    }

                    energy += signal[x] * signal[x];

                    // Get absolute signal average in current frame
                    double absValue = Math.Abs(signal[x]);
                    total += absValue;

                    // Get the maximum absolute signal value in current frame
                    if (absValue > maxAbsValue)
                    {
                        maxAbsValue = absValue;
                    }
                } // end of frame

                double frameDc = frameSum / frameSize;
                minValues[i]     = frameMin;
                maxValues[i]     = frameMax;
                average[i]       = total / frameSize;
                envelope[i]      = maxAbsValue;
                frameEnergy[i]   = energy / frameSize;
                frameDecibels[i] = 10 * Math.Log10(frameEnergy[i]);

                // remove DC value from signal values
                double[] signalMinusAv = new double[frameSize];
                for (int j = 0; j < frameSize; j++)
                {
                    signalMinusAv[j] = signal[start + j] - frameDc;
                }

                // generate the spectra of FFT AMPLITUDES - NOTE: f[0]=DC;  f[64]=Nyquist
                var f1 = fft.InvokeDotNetFFT(signalMinusAv);

                // Previous alternative call to do the FFT and return amplitude spectrum
                //f1 = fft.Invoke(window);

                // Smooth spectrum to reduce variance
                // In the early days (pre-2010), we used to smooth the spectra to reduce sonogram variance. This is statistically correct thing to do.
                // Later, we stopped this for standard sonograms but kept it for calculating acoustic indices.
                // As of 28 March 2017, we are merging the two codes and keeping spectrum smoothing.
                // Will need to check the effect on spectrograms.
                int smoothingWindow = 3;
                f1 = DataTools.filterMovingAverage(f1, smoothingWindow);

                // transfer amplitude spectrum to spectrogram matrix
                for (int j = 0; j < fft.CoeffCount; j++)
                {
                    spectrogram[i, j] = f1[j];
                }
            } // end frames

            // Remove the DC column ie column zero from amplitude spectrogram.
            double[,] amplitudeSpectrogram = MatrixTools.Submatrix(spectrogram, 0, 1, spectrogram.GetLength(0) - 1, spectrogram.GetLength(1) - 1);

            // check the envelope for clipping. Accept a clip if two consecutive frames have max value = 1,0
            Clipping.GetClippingCount(signal, envelope, frameStep, epsilon, out int highAmplitudeCount, out int clipCount);

            // get SNR data
            var snrData = new SNR(signal, frameIDs);

            return(new EnvelopeAndFft
            {
                // The following data is required when constructing sonograms
                Duration = TimeSpan.FromSeconds((double)signal.Length / sampleRate),
                Epsilon = epsilon,
                SampleRate = sampleRate,
                FrameCount = frameCount,
                FractionOfHighEnergyFrames = snrData.FractionOfHighEnergyFrames,
                WindowPower = fft.WindowPower,
                AmplitudeSpectrogram = amplitudeSpectrogram,

                // The below 11 variables are only used when calculating spectral and summary indices
                // energy level information
                ClipCount = clipCount,
                HighAmplitudeCount = highAmplitudeCount,
                MinSignalValue = minSignalValue,
                MaxSignalValue = maxSignalValue,

                // envelope info
                Average = average,
                MinFrameValues = minValues,
                MaxFrameValues = maxValues,
                Envelope = envelope,
                FrameEnergy = frameEnergy,
                FrameDecibels = frameDecibels,

                // freq scale info
                NyquistFreq = sampleRate / 2,
                NyquistBin = amplitudeSpectrogram.GetLength(1) - 1,
                FreqBinWidth = sampleRate / (double)amplitudeSpectrogram.GetLength(1) / 2,
            });
        }
 static void ConvertCharToSql(StringBuilder stringBuilder, char value)
 {
     DataTools.ConvertCharToSql(stringBuilder, "'", AppendConversionAction, value);
 }
        /// <summary>
        /// THis method does the work.
        /// </summary>
        /// <param name="audioRecording">the recording.</param>
        /// <param name="configuration">the config file.</param>
        /// <param name="profileName">name of the call/event type.</param>
        /// <param name="segmentStartOffset">where one segment is located in the total recording.</param>
        /// <returns>a list of events.</returns>
        private static RecognizerResults TerritorialCall(AudioRecording audioRecording, Config configuration, string profileName, TimeSpan segmentStartOffset)
        {
            ConfigFile.TryGetProfile(configuration, profileName, out var profile);

            // get the common properties
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "Pteropus species";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "Pteropus";

            // The following parameters worked well on a ten minute recording containing 14-16 calls.
            // Note: if you lower the dB threshold, you need to increase maxDurationSeconds
            int    minHz = profile.GetIntOrNull(AnalysisKeys.MinHz) ?? 800;
            int    maxHz = profile.GetIntOrNull(AnalysisKeys.MaxHz) ?? 8000;
            double minDurationSeconds = profile.GetDoubleOrNull(AnalysisKeys.MinDuration) ?? 0.15;
            double maxDurationSeconds = profile.GetDoubleOrNull(AnalysisKeys.MaxDuration) ?? 0.5;
            double decibelThreshold   = profile.GetDoubleOrNull(AnalysisKeys.DecibelThreshold) ?? 9.0;

            var minTimeSpan = TimeSpan.FromSeconds(minDurationSeconds);
            var maxTimeSpan = TimeSpan.FromSeconds(maxDurationSeconds);

            //######################
            //2. Convert each segment to a spectrogram.
            var sonogram     = GetSonogram(configuration, audioRecording);
            var decibelArray = SNR.CalculateFreqBandAvIntensity(sonogram.Data, minHz, maxHz, sonogram.NyquistFrequency);

            // prepare plots
            double intensityNormalisationMax = 3 * decibelThreshold;
            var    eventThreshold            = decibelThreshold / intensityNormalisationMax;
            var    normalisedIntensityArray  = DataTools.NormaliseInZeroOne(decibelArray, 0, intensityNormalisationMax);
            var    plot  = new Plot(speciesName + " Territory", normalisedIntensityArray, eventThreshold);
            var    plots = new List <Plot> {
                plot
            };

            //iii: CONVERT decibel SCORES TO ACOUSTIC EVENTS
            var acousticEvents = AcousticEvent.GetEventsAroundMaxima(
                decibelArray,
                segmentStartOffset,
                minHz,
                maxHz,
                decibelThreshold,
                minTimeSpan,
                maxTimeSpan,
                sonogram.FramesPerSecond,
                sonogram.FBinWidth);

            //iV add additional info to the acoustic events
            acousticEvents.ForEach(ae =>
            {
                ae.FileName               = audioRecording.BaseName;
                ae.SpeciesName            = speciesName;
                ae.Name                   = abbreviatedSpeciesName + profileName;
                ae.Profile                = profileName;
                ae.SegmentDurationSeconds = audioRecording.Duration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
            });

            acousticEvents = FilterEventsForSpectralProfile(acousticEvents, sonogram);

            return(new RecognizerResults()
            {
                Events = acousticEvents,
                Hits = null,
                ScoreTrack = null,
                Plots = plots,
                Sonogram = sonogram,
            });
        }
Exemple #29
0
 public static SpectralStats GetModeAndOneTailedStandardDeviation(double[,] matrix, int binCount, int upperPercentile)
 {
     double[] values = DataTools.Matrix2Array(matrix);
     return(GetModeAndOneTailedStandardDeviation(values, binCount, upperPercentile));
 }
Exemple #30
0
        }//Execute

        public static Tuple <double[]> Execute_SobelEdges(double[,] target, double dynamicRange, SpectrogramStandard sonogram,
                                                          List <AcousticEvent> segments, int minHz, int maxHz, double minDuration)
        {
            Log.WriteLine("SEARCHING FOR EVENTS LIKE TARGET.");
            if (segments == null)
            {
                return(null);
            }

            int minBin       = (int)(minHz / sonogram.FBinWidth);
            int maxBin       = (int)(maxHz / sonogram.FBinWidth);
            int targetLength = target.GetLength(0);

            //adjust target's dynamic range to that set by user
            target = SNR.SetDynamicRange(target, 3.0, dynamicRange); //set event's dynamic range
            double[,] edgeTarget = ImageTools.SobelEdgeDetection(target, 0.4);
            double[] v1 = DataTools.Matrix2Array(edgeTarget);
            v1 = DataTools.normalise2UnitLength(v1);

            //string imagePath2 =  @"C:\SensorNetworks\Output\FELT_Currawong\edgeTarget.png";
            //var image = BaseSonogram.Data2ImageData(edgeTarget);
            //ImageTools.DrawMatrix(image, 1, 1, imagePath2);

            double[] scores = new double[sonogram.FrameCount];
            foreach (AcousticEvent av in segments)
            {
                Log.WriteLine("SEARCHING SEGMENT.");
                int startRow = (int)Math.Round(av.TimeStart * sonogram.FramesPerSecond);
                int endRow   = (int)Math.Round(av.TimeEnd * sonogram.FramesPerSecond);
                if (endRow >= sonogram.FrameCount)
                {
                    endRow = sonogram.FrameCount;
                }

                int stopRow = endRow - targetLength;
                if (stopRow <= startRow)
                {
                    stopRow = startRow + 1;  //want minimum of one row
                }

                for (int r = startRow; r < stopRow; r++)
                {
                    double[,] matrix     = DataTools.Submatrix(sonogram.Data, r, minBin, r + targetLength - 1, maxBin);
                    matrix               = SNR.SetDynamicRange(matrix, 3.0, dynamicRange); //set event's dynamic range
                    double[,] edgeMatrix = ImageTools.SobelEdgeDetection(matrix, 0.4);

                    //string imagePath2 = @"C:\SensorNetworks\Output\FELT_Gecko\compare.png";
                    //var image = BaseSonogram.Data2ImageData(matrix);
                    //ImageTools.DrawMatrix(image, 1, 1, imagePath2);

                    double[] v2 = DataTools.Matrix2Array(edgeMatrix);
                    v2 = DataTools.normalise2UnitLength(v2);
                    double crossCor = DataTools.DotProduct(v1, v2);
                    scores[r] = crossCor;

                    //Log.WriteLine("row={0}\t{1:f10}", r, crossCor);
                } //end of rows in segment

                for (int r = stopRow; r < endRow; r++)
                {
                    scores[r] = scores[stopRow - 1]; //fill in end of segment
                }
            } //foreach (AcousticEvent av in segments)

            var tuple = Tuple.Create(scores);

            return(tuple);
        }//Execute