コード例 #1
0
        } //Analysis()

        public static Tuple<BaseSonogram, double[,], double[], List<AcousticEvent>> DetectHarmonics(
            AudioRecording recording,
            double intensityThreshold,
            int minHz,
            int minFormantgap,
            int maxFormantgap,
            double minDuration,
            int windowSize,
            double windowOverlap,
            TimeSpan segmentStartOffset)
        {
            //i: MAKE SONOGRAM
            int numberOfBins = 32;
            double binWidth = recording.SampleRate / (double)windowSize;
            int sr = recording.SampleRate;
            double frameDuration = windowSize / (double)sr; // Duration of full frame or window in seconds
            double frameOffset = frameDuration * (1 - windowOverlap); //seconds between starts of consecutive frames
            double framesPerSecond = 1 / frameOffset;

            //double framesPerSecond = sr / (double)windowSize;
            //int frameOffset = (int)(windowSize * (1 - overlap));
            //int frameCount = (length - windowSize + frameOffset) / frameOffset;

            double epsilon = Math.Pow(0.5, recording.BitsPerSample - 1);
            var results2 = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(
                recording.WavReader.Samples,
                sr,
                epsilon,
                windowSize,
                windowOverlap);
            double[] avAbsolute = results2.Average; //average absolute value over the minute recording

            //double[] envelope = results2.Item2;
            double[,]
                matrix = results2
                    .AmplitudeSpectrogram; //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            double windowPower = results2.WindowPower;

            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int minBin = (int)Math.Round(minHz / binWidth);
            int maxHz = (int)Math.Round(minHz + (numberOfBins * binWidth));

            int rowCount = matrix.GetLength(0);
            int colCount = matrix.GetLength(1);
            int maxbin = minBin + numberOfBins;
            double[,] subMatrix = MatrixTools.Submatrix(matrix, 0, minBin + 1, rowCount - 1, maxbin);

            //ii: DETECT HARMONICS
            int zeroBinCount = 5; //to remove low freq content which dominates the spectrum
            var results = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount);
            double[] intensity = results.Item1; //an array of periodicity scores
            double[] periodicity = results.Item2;

            //transfer periodicity info to a hits matrix.
            //intensity = DataTools.filterMovingAverage(intensity, 3);
            double[] scoreArray = new double[intensity.Length];
            var hits = new double[rowCount, colCount];
            for (int r = 0; r < rowCount; r++)
            {
                double relativePeriod = periodicity[r] / numberOfBins / 2;
                if (intensity[r] > intensityThreshold)
                {
                    for (int c = minBin; c < maxbin; c++)
                    {
                        hits[r, c] = relativePeriod;
                    }
                }

                double herzPeriod = periodicity[r] * binWidth;
                if (herzPeriod > minFormantgap && herzPeriod < maxFormantgap)
                {
                    scoreArray[r] = 2 * intensity[r] * intensity[r]; //enhance high score wrt low score.
                }
            }

            scoreArray = DataTools.filterMovingAverage(scoreArray, 11);

            //iii: CONVERT TO ACOUSTIC EVENTS
            double maxDuration = 100000.0; //abitrary long number - do not want to restrict duration of machine noise
            List<AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                scoreArray,
                minHz,
                maxHz,
                framesPerSecond,
                binWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);
            hits = null;

            //set up the songogram to return. Use the existing amplitude sonogram
            int bitsPerSample = recording.WavReader.BitsPerSample;
            TimeSpan duration = recording.Duration;
            NoiseReductionType nrt = SNR.KeyToNoiseReductionType("STANDARD");

            var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram(
                recording.BaseName,
                windowSize,
                windowOverlap,
                bitsPerSample,
                windowPower,
                sr,
                duration,
                nrt,
                matrix);

            sonogram.DecibelsNormalised = new double[rowCount];

            //foreach frame or time step
            for (int i = 0; i < rowCount; i++)
            {
                sonogram.DecibelsNormalised[i] = 2 * Math.Log10(avAbsolute[i]);
            }

            sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised);
            return Tuple.Create(sonogram, hits, scoreArray, predictedEvents);
        } //end Execute_HDDetect
コード例 #2
0
        public void TestKmeansClustering()
        {
            var outputDir       = this.outputDirectory;
            var recordingsPath  = PathHelper.ResolveAssetPath("FeatureLearning");
            var folderPath      = Path.Combine(recordingsPath, "random_audio_segments");
            var outputImagePath = Path.Combine(outputDir.FullName, "ReconstrcutedSpectrogram.png");

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(folderPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty. Test will fail!");
            }

            // get the nyquist value from the first wav file in the folder of recordings
            int nq = new AudioRecording(Directory.GetFiles(folderPath, "*.wav")[0]).Nyquist;

            int           nyquist       = nq;
            int           frameSize     = 1024;
            int           finalBinCount = 128;
            int           hertzInterval = 1000;
            FreqScaleType scaleType     = FreqScaleType.Mel;
            var           freqScale     = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);

            var sonoConfig = new SonogramConfig
            {
                WindowSize = frameSize,

                //WindowOverlap is set based on the fact that each 24 frames is equal to 1 second
                WindowOverlap      = 0.1028,
                DoMelScale         = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount        = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType = NoiseReductionType.None,
            };

            int numberOfFreqBand      = 4;
            int patchWidth            = finalBinCount / numberOfFreqBand;
            int patchHeight           = 1;
            int numberOfRandomPatches = 20;

            // Define variable number of "randomPatch" lists based on "numberOfFreqBand"
            Dictionary <string, List <double[, ]> > randomPatchLists = new Dictionary <string, List <double[, ]> >();

            for (int i = 0; i < numberOfFreqBand; i++)
            {
                randomPatchLists.Add(string.Format("randomPatch{0}", i.ToString()), new List <double[, ]>());
            }

            List <double[, ]> randomPatches = new List <double[, ]>();

            foreach (string filePath in Directory.GetFiles(folderPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    sonoConfig.SourceFName = recording.BaseName;
                    var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

                    // DO RMS NORMALIZATION
                    sonogram.Data = SNR.RmsNormalization(sonogram.Data);

                    // DO NOISE REDUCTION
                    sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data);

                    // creating matrices from different freq bands of the source spectrogram
                    List <double[, ]> allSubmatrices = PatchSampling.GetFreqBandMatrices(sonogram.Data, numberOfFreqBand);

                    // Second: selecting random patches from each freq band matrix and add them to the corresponding patch list
                    int count = 0;
                    while (count < allSubmatrices.Count)
                    {
                        randomPatchLists[string.Format("randomPatch{0}", count.ToString())].Add(PatchSampling.GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numberOfRandomPatches, PatchSampling.SamplingMethod.Random).ToMatrix());
                        count++;
                    }
                }
            }

            foreach (string key in randomPatchLists.Keys)
            {
                randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key]));
            }

            // convert list of random patches matrices to one matrix
            int numberOfClusters = 32;
            List <KMeansClusterCollection> allClusteringOutput = new List <KMeansClusterCollection>();

            for (int i = 0; i < randomPatches.Count; i++)
            {
                double[,] patchMatrix = randomPatches[i];

                // Do k-means clustering
                string pathToClusterCsvFile = Path.Combine(outputDir.FullName, "ClusterCentroids" + i.ToString() + ".csv");
                var    clusteringOutput     = KmeansClustering.Clustering(patchMatrix, numberOfClusters);

                // sorting clusters based on size and output it to a csv file
                Dictionary <int, double> clusterIdSize = clusteringOutput.ClusterIdSize;
                int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize);

                // Write cluster ID and size to a CSV file
                string pathToClusterSizeCsvFile = Path.Combine(outputDir.FullName, "ClusterSize" + i.ToString() + ".csv");
                Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize);

                // Draw cluster image directly from clustering output
                List <KeyValuePair <int, double[]> > listCluster = clusteringOutput.ClusterIdCentroid.ToList();
                double[][] centroids = new double[listCluster.Count][];

                for (int j = 0; j < listCluster.Count; j++)
                {
                    centroids[j] = listCluster[j].Value;
                }

                allClusteringOutput.Add(clusteringOutput.Clusters);

                List <double[, ]> allCentroids = new List <double[, ]>();
                for (int k = 0; k < centroids.Length; k++)
                {
                    // convert each centroid to a matrix in order of cluster ID
                    // OR: in order of cluster size
                    double[,] centroid = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, patchHeight);

                    // normalize each centroid
                    double[,] normalizedCentroid = DataTools.normalise(centroid);

                    // add a row of zero to each centroid
                    double[,] newCentroid = PatchSampling.AddRow(normalizedCentroid);

                    allCentroids.Add(newCentroid);
                }

                // concatenate all centroids
                double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids);

                // Draw clusters
                var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix);
                clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone);

                var outputClusteringImage = Path.Combine(outputDir.FullName, "ClustersWithGrid" + i.ToString() + ".bmp");
                FrequencyScale.DrawFrequencyLinesOnImage((Image <Rgb24>)clusterImage, freqScale, includeLabels: false);
                clusterImage.Save(outputClusteringImage);
            }

            //+++++++++++++++++++++++++++++++++++++++++++Reconstructing a target spectrogram from sequential patches and the cluster centroids
            var recording2Path = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");
            var recording2     = new AudioRecording(recording2Path);
            var sonogram2      = new SpectrogramStandard(sonoConfig, recording2.WavReader);
            var targetSpec     = sonogram2.Data;

            // Do RMS normalization
            sonogram2.Data = SNR.RmsNormalization(sonogram2.Data);

            // NOISE REDUCTION
            sonogram2.Data = PcaWhitening.NoiseReduction(sonogram2.Data);

            // extracting sequential patches from the target spectrogram
            List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(sonogram2.Data, numberOfFreqBand);

            double[][,] matrices2 = allSubmatrices2.ToArray();
            List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>();

            for (int i = 0; i < matrices2.GetLength(0); i++)
            {
                int rows              = matrices2[i].GetLength(0);
                int columns           = matrices2[i].GetLength(1);
                var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
                allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix());
            }

            List <double[, ]> convertedSpectrogram = new List <double[, ]>();
            int columnPerFreqBand = sonogram2.Data.GetLength(1) / numberOfFreqBand;

            for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
            {
                double[,] reconstructedSpec2 = KmeansClustering.ReconstructSpectrogram(allSequentialPatchMatrix.ToArray()[i], allClusteringOutput.ToArray()[i]);
                convertedSpectrogram.Add(PatchSampling.ConvertPatches(reconstructedSpec2, patchWidth, patchHeight, columnPerFreqBand));
            }

            sonogram2.Data = PatchSampling.ConcatFreqBandMatrices(convertedSpectrogram);

            // DO DRAW SPECTROGRAM
            var reconstructedSpecImage = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + freqScale.ScaleType.ToString(), freqScale.GridLineLocations);

            reconstructedSpecImage.Save(outputImagePath);

            // DO UNIT TESTING
            Assert.AreEqual(targetSpec.GetLength(0), sonogram2.Data.GetLength(0));
            Assert.AreEqual(targetSpec.GetLength(1), sonogram2.Data.GetLength(1));
        }
コード例 #3
0
        } // FELTWithBinaryTemplate()

        /// <summary>
        /// Scans a recording given a dicitonary of parameters and a syntactic template
        /// Template has a different orientation to others.
        /// </summary>
        /// <param name="sonogram"></param>
        /// <param name="dict"></param>
        /// <param name="templateMatrix"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="recording"></param>
        /// <param name="templatePath"></param>
        /// <returns></returns>
        public static Tuple <SpectrogramStandard, List <AcousticEvent>, double[]> FELTWithSprTemplate(SpectrogramStandard sonogram, Dictionary <string, string> dict, char[,] templateMatrix, TimeSpan segmentStartOffset)
        {
            //i: get parameters from dicitonary
            string callName       = dict[FeltTemplate_Create.key_CALL_NAME];
            bool   doSegmentation = bool.Parse(dict[FeltTemplate_Create.key_DO_SEGMENTATION]);
            double smoothWindow   = double.Parse(dict[FeltTemplate_Create.key_SMOOTH_WINDOW]);       //before segmentation
            int    minHz          = int.Parse(dict[FeltTemplate_Create.key_MIN_HZ]);
            int    maxHz          = int.Parse(dict[FeltTemplate_Create.key_MAX_HZ]);
            double minDuration    = double.Parse(dict[FeltTemplate_Create.key_MIN_DURATION]);        //min duration of event in seconds
            double dBThreshold    = double.Parse(dict[FeltTemplate_Create.key_DECIBEL_THRESHOLD]);   // = 9.0; // dB threshold

            dBThreshold = 4.0;
            int binCount = (int)(maxHz / sonogram.FBinWidth) - (int)(minHz / sonogram.FBinWidth) + 1;

            Log.WriteLine("Freq band: {0} Hz - {1} Hz. (Freq bin count = {2})", minHz, maxHz, binCount);

            //ii: TEMPLATE INFO
            double templateDuration = templateMatrix.GetLength(0) / sonogram.FramesPerSecond;

            Log.WriteIfVerbose("Template duration = {0:f3} seconds or {1} frames.", templateDuration, templateMatrix.GetLength(0));
            Log.WriteIfVerbose("Min Duration: " + minDuration + " seconds");

            //iii: DO SEGMENTATION
            double segmentationThreshold = 2.0;             // Standard deviations above backgorund noise
            double maxDuration           = double.MaxValue; // Do not constrain maximum length of events.
            var    tuple1        = AcousticEvent.GetSegmentationEvents((SpectrogramStandard)sonogram, doSegmentation, segmentStartOffset, minHz, maxHz, smoothWindow, segmentationThreshold, minDuration, maxDuration);
            var    segmentEvents = tuple1.Item1;

            //iv: Score sonogram for events matching template
            //#############################################################################################################################################
            var tuple2 = FindMatchingEvents.Execute_Spr_Match(templateMatrix, sonogram, segmentEvents, minHz, maxHz, dBThreshold);
            //var tuple2 = FindMatchingEvents.Execute_StewartGage(target, dynamicRange, (SpectralSonogram)sonogram, segmentEvents, minHz, maxHz, minDuration);
            //var tuple2 = FindMatchingEvents.Execute_SobelEdges(target, dynamicRange, (SpectralSonogram)sonogram, segmentEvents, minHz, maxHz, minDuration);
            //var tuple2 = FindMatchingEvents.Execute_MFCC_XCOR(target, dynamicRange, sonogram, segmentEvents, minHz, maxHz, minDuration);
            var scores = tuple2.Item1;

            //#############################################################################################################################################

            //v: PROCESS SCORE ARRAY
            //scores = DataTools.filterMovingAverage(scores, 3);
            LoggedConsole.WriteLine("Scores: min={0:f4}, max={1:f4}, threshold={2:f2}dB", scores.Min(), scores.Max(), dBThreshold);
            //Set (scores < 0.0) = 0.0;
            for (int i = 0; i < scores.Length; i++)
            {
                if (scores[i] < 0.0)
                {
                    scores[i] = 0.0;
                }
            }

            //vi: EXTRACT EVENTS
            List <AcousticEvent> matchEvents = AcousticEvent.ConvertScoreArray2Events(scores, minHz, maxHz, sonogram.FramesPerSecond, sonogram.FBinWidth, dBThreshold,
                                                                                      minDuration, maxDuration,
                                                                                      segmentStartOffset);

            foreach (AcousticEvent ev in matchEvents)
            {
                ev.FileName = sonogram.Configuration.SourceFName;
                ev.Name     = sonogram.Configuration.CallName;
            }

            // Edit the events to correct the start time, duration and end of events to match the max score and length of the template.
            AdjustEventLocation(matchEvents, callName, templateDuration, sonogram.Duration.TotalSeconds);

            return(Tuple.Create(sonogram, matchEvents, scores));
        } // FELTWithSprTemplate()
コード例 #4
0
        public static void Execute(Arguments arguments)
        {
            if (arguments == null)
            {
                arguments = Dev();
            }

            string title = "# FIND OTHER ACOUSTIC EVENTS LIKE THIS";
            string date  = "# DATE AND TIME: " + DateTime.Now;

            Log.WriteLine(title);
            Log.WriteLine(date);

            Log.Verbosity = 1;
            Log.WriteIfVerbose("# Recording     =" + arguments.Source); //the recording to be scanned
            Log.WriteIfVerbose("# Template list =" + arguments.Config); //the path to a file containing the paths to template locations, one template per line
            Log.WriteIfVerbose("# Output folder =" + arguments.Output); //name of output dir

            var allEvents     = new List <AcousticEvent>();
            var scoresList    = new List <double[]>();
            var thresholdList = new List <double>();

            //i: GET RECORDING
            AudioRecording recording = new AudioRecording(arguments.Source.FullName);
            //if (recording.SampleRate != 22050) recording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE
            int sr = recording.SampleRate;

            //ii: MAKE SONOGRAM
            Log.WriteLine("Start sonogram.");
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.SourceFName        = recording.BaseName;
            sonoConfig.WindowOverlap      = FeltFrameOverlap; // set default value
            sonoConfig.DoMelScale         = false;
            sonoConfig.NoiseReductionType = NoiseReductionType.Standard;
            AmplitudeSonogram   basegram = new AmplitudeSonogram(sonoConfig, recording.WavReader);
            SpectrogramStandard sonogram = new SpectrogramStandard(basegram);  //spectrogram has dim[N,257]

            Log.WriteLine("Signal: Duration={0}, Sample Rate={1}", sonogram.Duration, sr);
            Log.WriteLine("Frames: Size={0}, Count={1}, Duration={2:f1}ms, Overlap={5:f0}%, Offset={3:f1}ms, Frames/s={4:f1}",
                          sonogram.Configuration.WindowSize, sonogram.FrameCount, (sonogram.FrameDuration * 1000),
                          (sonogram.FrameStep * 1000), sonogram.FramesPerSecond, FeltFrameOverlap * 100);

            //iii: Get zip paths and the results Tuple
            List <string> zipList = FileTools.ReadTextFile(arguments.Config.FullName);
            Tuple <SpectrogramStandard, List <AcousticEvent>, double[]> results = null; //set up the results Tuple

            foreach (string zipPath in zipList)
            {
                if (zipPath.StartsWith("#"))
                {
                    continue;                           // commented line
                }
                if (zipPath.Length < 2)
                {
                    continue;                           // empty line
                }
                //i: get params file
                ZipFile.ExtractToDirectory(arguments.Output.FullName, zipPath);
                string   zipName    = Path.GetFileNameWithoutExtension(zipPath);
                string[] parts      = zipName.Split('_');
                string   paramsPath = Path.Combine(arguments.Output.FullName, parts[0] + "_" + parts[1] + "_Params.txt");

                string id = parts[0] + "_" + parts[1];
                Log.WriteIfVerbose("################################################### " + id + " ########################################################");

                //ii: READ PARAMETER VALUES FROM INI FILE
                var config = new ConfigDictionary(paramsPath);
                Dictionary <string, string> dict = config.GetTable();
                //Dictionary<string, string>.KeyCollection keys = dict.Keys;
                //int DRAW_SONOGRAMS = Int32.Parse(dict[FeltTemplate_Create.key_DRAW_SONOGRAMS]);          //options to draw sonogram
                dict[FeltTemplate_Create.key_DECIBEL_THRESHOLD] = "4.0";
                dict[FeltTemplate_Create.key_MIN_DURATION]      = "0.02";

                if (zipName.EndsWith("binaryTemplate"))
                {
                    string templatePath = Path.Combine(arguments.Output.FullName, id + "_binary.bmp");
                    double[,] templateMatrix = FindMatchingEvents.ReadImage2BinaryMatrixDouble(templatePath);
                    results = FELTWithBinaryTemplate(sonogram, dict, templateMatrix, TimeSpan.Zero);
                }
                else
                if (zipName.EndsWith("trinaryTemplate"))
                {
                    string templatePath = Path.Combine(arguments.Output.FullName, id + "_trinary.bmp");
                    double[,] templateMatrix = FindMatchingEvents.ReadImage2TrinaryMatrix(templatePath);
                    results = FELTWithBinaryTemplate(sonogram, dict, templateMatrix, TimeSpan.Zero);
                }
                else
                if (zipName.EndsWith("syntacticTemplate"))
                {
                    string templatePath = Path.Combine(arguments.Output.FullName, id + "_spr.txt");
                    char[,] templateMatrix = FindMatchingEvents.ReadTextFile2CharMatrix(templatePath);
                    results = FELTWithSprTemplate(sonogram, dict, templateMatrix, TimeSpan.Zero);
                }
                else
                {
                    Log.WriteLine("ERROR! UNKNOWN TEMPLATE: Zip file has unrecognised suffix:" + zipName);
                    continue;
                }

                //get results
                sonogram = results.Item1;
                var matchingEvents = results.Item2;
                var scores         = results.Item3;

                double matchThreshold = double.Parse(dict[FeltTemplate_Create.key_DECIBEL_THRESHOLD]);
                Log.WriteLine("# Finished detecting events like target: " + id);
                Log.WriteLine("# Matching Event Count = " + matchingEvents.Count);
                Log.WriteLine("           @ threshold = {0:f2}", matchThreshold);

                // accumulate results
                allEvents.AddRange(matchingEvents);
                scoresList.Add(scores);
                thresholdList.Add(matchThreshold);

                //v: write events count to results info file.
                double        sigDuration = sonogram.Duration.TotalSeconds;
                string        fname       = arguments.Source.Name;
                string        str         = string.Format("{0}\t{1}\t{2}", fname, sigDuration, matchingEvents.Count);
                StringBuilder sb          = AcousticEvent.WriteEvents(matchingEvents, str);
                FileTools.WriteTextFile("opPath", sb.ToString());
            } // foreach (string zipPath in zipList)

            Log.WriteLine("\n\n\n##########################################################");
            Log.WriteLine("# Finished detecting events");
            Log.WriteLine("# Event Count = " + allEvents.Count);
            foreach (AcousticEvent ae in allEvents)
            {
                Log.WriteLine("# Event name = {0}  ############################", ae.Name);
                Log.WriteLine("# Event time = {0:f2} to {1:f2} (frames {2}-{3}).", ae.TimeStart, ae.TimeEnd, ae.Oblong.RowTop, ae.Oblong.RowBottom);
                Log.WriteLine("# Event score= {0:f2}.", ae.Score);
            }

            int percentOverlap = 50;

            allEvents = PruneOverlappingEvents(allEvents, percentOverlap);
            Log.WriteLine("\n##########################################################");
            Log.WriteLine("# Finished pruning events");
            Log.WriteLine("# Event Count = " + allEvents.Count);
            WriteEventNames(allEvents);

            //WriteScoreAverages2Console(scoresList);

            //draw images of sonograms
            int      DRAW_SONOGRAMS = 2;
            FileInfo opImagePath    = arguments.Output.CombineFile(Path.GetFileNameWithoutExtension(arguments.Source.Name) + "_matchingEvents.png");

            if (DRAW_SONOGRAMS == 2)
            {
                DrawSonogram(sonogram, opImagePath, allEvents, thresholdList, scoresList);
            }
            else
            if ((DRAW_SONOGRAMS == 1) && (allEvents.Count > 0))
            {
                DrawSonogram(sonogram, opImagePath, allEvents, thresholdList, scoresList);
            }

            Log.WriteLine("# FINISHED passing all templates over recording:- " + arguments.Source.Name);
        }
コード例 #5
0
        /// <summary>
        /// Calculates the following spectrograms as per settings in the Images array in the config file: Towsey.SpectrogramGenerator.yml:
        /// Waveform.
        /// DecibelSpectrogram.
        /// DecibelSpectrogramNoiseReduced.
        /// CepstralSpectrogram.
        /// DifferenceSpectrogram.
        /// AmplitudeSpectrogramLocalContrastNormalization.
        /// Experimental.
        /// Comment the config.yml file with a hash, those spectrograms that are not required.
        /// </summary>
        /// <param name="sourceRecording">The name of the original recording.</param>
        /// <param name="config">Contains parameter info to make spectrograms.</param>
        /// <param name="sourceRecordingName">.Name of source recording. Required only spectrogram labels.</param>
        public static AudioToSonogramResult GenerateSpectrogramImages(
            FileInfo sourceRecording,
            SpectrogramGeneratorConfig config,
            string sourceRecordingName)
        {
            //int signalLength = recordingSegment.WavReader.GetChannel(0).Length;
            var recordingSegment = new AudioRecording(sourceRecording.FullName);
            int sampleRate       = recordingSegment.WavReader.SampleRate;
            var result           = new AudioToSonogramResult();

            var requestedImageTypes = config.Images ?? new[] { SpectrogramImageType.DecibelSpectrogram };
            var @do = requestedImageTypes.ToHashSet();

            int frameSize = config.GetIntOrNull("FrameLength") ?? 512;
            int frameStep = config.GetIntOrNull("FrameStep") ?? 441;

            // must calculate this because used later on.
            double frameOverlap = (frameSize - frameStep) / (double)frameSize;

            // Default noiseReductionType = Standard
            var bgNoiseThreshold = config.BgNoiseThreshold;

            // threshold for drawing the difference spectrogram
            var differenceThreshold = config.DifferenceThreshold;

            // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT
            var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameSize, frameStep);

            var sonoConfig = new SonogramConfig()
            {
                epsilon                 = recordingSegment.Epsilon,
                SampleRate              = sampleRate,
                WindowSize              = frameSize,
                WindowStep              = frameStep,
                WindowOverlap           = frameOverlap,
                WindowPower             = dspOutput1.WindowPower,
                Duration                = recordingSegment.Duration,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = bgNoiseThreshold,
            };

            var images = new Dictionary <SpectrogramImageType, Image <Rgb24> >(requestedImageTypes.Length);

            // IMAGE 1) draw the WAVEFORM
            if (@do.Contains(Waveform))
            {
                var minValues     = dspOutput1.MinFrameValues;
                var maxValues     = dspOutput1.MaxFrameValues;
                int height        = config.WaveformHeight;
                var waveformImage = GetWaveformImage(minValues, maxValues, height);

                // add in the title bar and time scales.
                string title =
                    $"WAVEFORM - {sourceRecordingName} (min value={dspOutput1.MinSignalValue:f3}, max value={dspOutput1.MaxSignalValue:f3})";
                var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(
                    title,
                    waveformImage.Width,
                    ImageTags[Waveform]);
                var      startTime          = TimeSpan.Zero;
                var      xAxisTicInterval   = TimeSpan.FromSeconds(1);
                TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(frameStep / (double)sampleRate);
                var      labelInterval      = TimeSpan.FromSeconds(5);
                waveformImage = BaseSonogram.FrameSonogram(
                    waveformImage,
                    titleBar,
                    startTime,
                    xAxisTicInterval,
                    xAxisPixelDuration,
                    labelInterval);
                images.Add(Waveform, waveformImage);
            }

            // Draw various decibel spectrograms
            var decibelTypes = new[] { SpectrogramImageType.DecibelSpectrogram, DecibelSpectrogramNoiseReduced, DifferenceSpectrogram, Experimental };

            if (@do.Overlaps(decibelTypes))
            {
                // disable noise removal for first two spectrograms
                var disabledNoiseReductionType = sonoConfig.NoiseReductionType;
                sonoConfig.NoiseReductionType = NoiseReductionType.None;

                //Get the decibel spectrogram
                var decibelSpectrogram = new SpectrogramStandard(sonoConfig, dspOutput1.AmplitudeSpectrogram);
                result.DecibelSpectrogram   = decibelSpectrogram;
                double[,] dbSpectrogramData = (double[, ])decibelSpectrogram.Data.Clone();

                // IMAGE 2) Display the DecibelSpectrogram
                if (@do.Contains(SpectrogramImageType.DecibelSpectrogram))
                {
                    images.Add(
                        SpectrogramImageType.DecibelSpectrogram,
                        decibelSpectrogram.GetImageFullyAnnotated(
                            $"DECIBEL SPECTROGRAM ({sourceRecordingName})",
                            ImageTags[SpectrogramImageType.DecibelSpectrogram]));
                }

                if (@do.Overlaps(new[] { DecibelSpectrogramNoiseReduced, Experimental, CepstralSpectrogram }))
                {
                    sonoConfig.NoiseReductionType      = disabledNoiseReductionType;
                    sonoConfig.NoiseReductionParameter = bgNoiseThreshold;
                    double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram.Data);
                    decibelSpectrogram.Data =
                        SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram.Data, spectralDecibelBgn);
                    decibelSpectrogram.Data =
                        SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram.Data, nhThreshold: bgNoiseThreshold);

                    // IMAGE 3) DecibelSpectrogram - noise reduced
                    if (@do.Contains(DecibelSpectrogramNoiseReduced))
                    {
                        images.Add(
                            DecibelSpectrogramNoiseReduced,
                            decibelSpectrogram.GetImageFullyAnnotated(
                                $"DECIBEL SPECTROGRAM + Lamel noise subtraction. ({sourceRecordingName})",
                                ImageTags[DecibelSpectrogramNoiseReduced]));
                    }

                    // IMAGE 4) EXPERIMENTAL Spectrogram
                    if (@do.Contains(Experimental))
                    {
                        sonoConfig.NoiseReductionType = disabledNoiseReductionType;
                        images.Add(
                            Experimental,
                            GetDecibelSpectrogram_Ridges(
                                dbSpectrogramData,
                                decibelSpectrogram,
                                sourceRecordingName));
                    }
                }

                // IMAGE 5) draw difference spectrogram. This is derived from the original decibel spectrogram
                if (@do.Contains(DifferenceSpectrogram))
                {
                    //var differenceThreshold = configInfo.GetDoubleOrNull("DifferenceThreshold") ?? 3.0;
                    var differenceImage = GetDifferenceSpectrogram(dbSpectrogramData, differenceThreshold);
                    differenceImage = BaseSonogram.GetImageAnnotatedWithLinearHertzScale(
                        differenceImage,
                        sampleRate,
                        frameStep,
                        $"DECIBEL DIFFERENCE SPECTROGRAM ({sourceRecordingName})",
                        ImageTags[DifferenceSpectrogram]);
                    images.Add(DifferenceSpectrogram, differenceImage);
                }
            }

            // IMAGE 6) Cepstral Spectrogram
            if (@do.Contains(CepstralSpectrogram))
            {
                images.Add(
                    CepstralSpectrogram,
                    GetCepstralSpectrogram(sonoConfig, recordingSegment, sourceRecordingName));
            }

            // IMAGE 7) AmplitudeSpectrogram_LocalContrastNormalization
            if (@do.Contains(AmplitudeSpectrogramLocalContrastNormalization))
            {
                var neighborhoodSeconds  = config.NeighborhoodSeconds;
                var lcnContrastParameter = config.LcnContrastLevel;
                images.Add(
                    AmplitudeSpectrogramLocalContrastNormalization,
                    GetLcnSpectrogram(
                        sonoConfig,
                        recordingSegment,
                        sourceRecordingName,
                        neighborhoodSeconds,
                        lcnContrastParameter));
            }

            // now pick and combine images in order user specified
            var sortedImages = requestedImageTypes.Select(x => images[x]);

            // COMBINE THE SPECTROGRAM IMAGES
            result.CompositeImage = ImageTools.CombineImagesVertically(sortedImages.ToArray());
            return(result);
        }
コード例 #6
0
        /// <summary>
        /// Detects oscillations in a given freq bin.
        /// there are several important parameters for tuning.
        /// a) DCTLength: Good values are 0.25 to 0.50 sec. Do not want too long because DCT requires stationarity.
        ///     Do not want too short because too small a range of oscillations
        /// b) DCTindex: Sets lower bound for oscillations of interest. Index refers to array of coefficient returned by DCT.
        ///     Array has same length as the length of the DCT. Low freq oscillations occur more often by chance. Want to exclude them.
        /// c) MinAmplitude: minimum acceptable value of a DCT coefficient if hit is to be accepted.
        ///     The algorithm is sensitive to this value. A lower value results in more oscillation hits being returned.
        /// </summary>
        /// <param name="sonogram">A spectrogram.</param>
        /// <param name="minHz">min freq bin of search band.</param>
        /// <param name="maxHz">max freq bin of search band.</param>
        /// <param name="dctDuration">number of values.</param>
        /// <param name="minOscilFreq">minimum oscillation freq.</param>
        /// <param name="maxOscilFreq">maximum oscillation freq.</param>
        /// <param name="dctThreshold">threshold - do not accept a DCT coefficient if its value is less than this threshold.</param>
        public static double[,] DetectOscillations(SpectrogramStandard sonogram, int minHz, int maxHz, double dctDuration, int minOscilFreq, int maxOscilFreq, double dctThreshold)
        {
            int minBin = (int)(minHz / sonogram.FBinWidth);
            int maxBin = (int)(maxHz / sonogram.FBinWidth);

            int dctLength = (int)Math.Round(sonogram.FramesPerSecond * dctDuration);
            int minIndex  = (int)(minOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi
            int maxIndex  = (int)(maxOscilFreq * dctDuration * 2); //multiply by 2 because index = Pi and not 2Pi

            int midOscilFreq = minOscilFreq + ((maxOscilFreq - minOscilFreq) / 2);

            //safety check
            if (maxIndex > dctLength)
            {
                return(null);
            }

            int rows = sonogram.Data.GetLength(0);
            int cols = sonogram.Data.GetLength(1);

            double[,] hits   = new double[rows, cols];
            double[,] matrix = sonogram.Data;

            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients

            //following two lines write matrix of cos values for checking.
            //string txtPath = @"C:\SensorNetworks\Output\cosines.txt";
            //FileTools.WriteMatrix2File_Formatted(cosines, txtPath, "F3");

            //following two lines write bmp image of cos values for checking.
            //string bmpPath = @"C:\SensorNetworks\Output\cosines.png";
            //ImageTools.DrawMatrix(cosines, bmpPath, true);

            //traverse columns - skip DC column
            for (int c = minBin; c <= maxBin; c++)
            {
                var dctArray = new double[dctLength];

                for (int r = 0; r < rows - dctLength; r++)
                {
                    // extract array and ready for DCT
                    for (int i = 0; i < dctLength; i++)
                    {
                        dctArray[i] = matrix[r + i, c];
                    }

                    int lowerDctBound   = minIndex / 4;
                    var dctCoeff        = DoDct(dctArray, cosines, lowerDctBound);
                    int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff);

                    //mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude
                    if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoeff[indexOfMaxValue] > dctThreshold)
                    {
                        for (int i = 0; i < dctLength; i++)
                        {
                            hits[r + i, c] = midOscilFreq;
                        }
                    }

                    r += 5; //skip rows
                }

                c++; //do alternate columns
            }

            return(hits);
        }
コード例 #7
0
        /// <inheritdoc/>
        public override RecognizerResults Recognize(
            AudioRecording audioRecording,
            Config genericConfig,
            TimeSpan segmentStartOffset,
            Lazy <IndexCalculateResult[]> getSpectralIndexes,
            DirectoryInfo outputDirectory,
            int?imageWidth)
        {
            var configuration = (GenericRecognizerConfig)genericConfig;

            if (configuration.Profiles.NotNull() && configuration.Profiles.Count == 0)
            {
                throw new ConfigFileException(
                          "The generic recognizer needs at least one profile set. 0 were found.");
            }

            int count   = configuration.Profiles.Count;
            var message = $"Found {count} analysis profile(s): " + configuration.Profiles.Keys.Join(", ");

            Log.Info(message);

            var allResults = new RecognizerResults()
            {
                Events     = new List <AcousticEvent>(),
                Hits       = null,
                ScoreTrack = null,
                Plots      = new List <Plot>(),
                Sonogram   = null,
            };

            // Now process each of the profiles
            foreach (var(profileName, profileConfig) in configuration.Profiles)
            {
                Log.Info("Processing profile: " + profileName);

                List <AcousticEvent> acousticEvents;
                var plots = new List <Plot>();
                SpectrogramStandard sonogram;

                Log.Debug($"Using the {profileName} algorithm... ");
                if (profileConfig is CommonParameters parameters)
                {
                    if (profileConfig is BlobParameters || profileConfig is OscillationParameters || profileConfig is WhistleParameters || profileConfig is HarmonicParameters)
                    {
                        sonogram = new SpectrogramStandard(ParametersToSonogramConfig(parameters), audioRecording.WavReader);

                        if (profileConfig is OscillationParameters op)
                        {
                            Oscillations2012.Execute(
                                sonogram,
                                op.MinHertz.Value,
                                op.MaxHertz.Value,
                                op.DctDuration,
                                op.MinOscillationFrequency,
                                op.MaxOscillationFrequency,
                                op.DctThreshold,
                                op.EventThreshold,
                                op.MinDuration.Value,
                                op.MaxDuration.Value,
                                out var scores,
                                out acousticEvents,
                                out var hits,
                                segmentStartOffset);

                            //plots.Add(new Plot($"{profileName} (:OscillationScore)", scores, op.EventThreshold));
                            var plot = PreparePlot(scores, $"{profileName} (:OscillationScore)", op.EventThreshold);
                            plots.Add(plot);
                        }
                        else if (profileConfig is BlobParameters bp)
                        {
                            //get the array of intensity values minus intensity in side/buffer bands.
                            //i.e. require silence in side-bands. Otherwise might simply be getting part of a broader band acoustic event.
                            var decibelArray = SNR.CalculateFreqBandAvIntensityMinusBufferIntensity(
                                sonogram.Data,
                                bp.MinHertz.Value,
                                bp.MaxHertz.Value,
                                bp.BottomHertzBuffer.Value,
                                bp.TopHertzBuffer.Value,
                                sonogram.NyquistFrequency);

                            // prepare plot of resultant blob decibel array.
                            var plot = PreparePlot(decibelArray, $"{profileName} (Blob:db Intensity)", bp.DecibelThreshold.Value);
                            plots.Add(plot);

                            // iii: CONVERT blob decibel SCORES TO ACOUSTIC EVENTS.
                            // Note: This method does NOT do prior smoothing of the dB array.
                            acousticEvents = AcousticEvent.GetEventsAroundMaxima(
                                decibelArray,
                                segmentStartOffset,
                                bp.MinHertz.Value,
                                bp.MaxHertz.Value,
                                bp.DecibelThreshold.Value,
                                TimeSpan.FromSeconds(bp.MinDuration.Value),
                                TimeSpan.FromSeconds(bp.MaxDuration.Value),
                                sonogram.FramesPerSecond,
                                sonogram.FBinWidth);
                        }
                        else if (profileConfig is WhistleParameters wp)
                        {
                            //get the array of intensity values minus intensity in side/buffer bands.
                            double[] decibelArray;
                            (acousticEvents, decibelArray) = WhistleParameters.GetWhistles(
                                sonogram,
                                wp.MinHertz.Value,
                                wp.MaxHertz.Value,
                                sonogram.NyquistFrequency,
                                wp.DecibelThreshold.Value,
                                wp.MinDuration.Value,
                                wp.MaxDuration.Value,
                                segmentStartOffset);

                            var plot = PreparePlot(decibelArray, $"{profileName} (Whistle:dB Intensity)", wp.DecibelThreshold.Value);
                            plots.Add(plot);
                        }
                        else if (profileConfig is HarmonicParameters hp)
                        {
                            //get the array of intensity values minus intensity in side/buffer bands.
                            double[] scoreArray;
                            (acousticEvents, scoreArray) = HarmonicParameters.GetComponentsWithHarmonics(
                                sonogram,
                                hp.MinHertz.Value,
                                hp.MaxHertz.Value,
                                sonogram.NyquistFrequency,
                                hp.DecibelThreshold.Value,
                                hp.DctThreshold.Value,
                                hp.MinDuration.Value,
                                hp.MaxDuration.Value,
                                hp.MinFormantGap.Value,
                                hp.MaxFormantGap.Value,
                                segmentStartOffset);

                            var plot = PreparePlot(scoreArray, $"{profileName} (Harmonics:dB Intensity)", hp.DecibelThreshold.Value);
                            plots.Add(plot);
                        }
                        else
                        {
                            throw new InvalidOperationException();
                        }
                    }
                    else
                    {
                        throw new InvalidOperationException();
                    }

                    //iV add additional info to the acoustic events
                    acousticEvents.ForEach(ae =>
                    {
                        ae.FileName               = audioRecording.BaseName;
                        ae.SpeciesName            = parameters.SpeciesName;
                        ae.Name                   = parameters.ComponentName;
                        ae.Profile                = profileName;
                        ae.SegmentDurationSeconds = audioRecording.Duration.TotalSeconds;
                        ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                        ae.SetTimeAndFreqScales(sonogram.FrameStep, sonogram.FrameDuration, sonogram.FBinWidth);
                    });
                }
                else if (profileConfig is Aed.AedConfiguration ac)
                {
                    var config = new SonogramConfig
                    {
                        NoiseReductionType      = ac.NoiseReductionType,
                        NoiseReductionParameter = ac.NoiseReductionParameter,
                    };
                    sonogram = new SpectrogramStandard(config, audioRecording.WavReader);

                    acousticEvents = Aed.CallAed(sonogram, ac, segmentStartOffset, audioRecording.Duration).ToList();
                }
                else
                {
                    throw new InvalidOperationException();
                }

                // combine the results i.e. add the events list of call events.
                allResults.Events.AddRange(acousticEvents);
                allResults.Plots.AddRange(plots);

                // effectively keeps only the *last* sonogram produced
                allResults.Sonogram = sonogram;
                Log.Debug($"{profileName} event count = {acousticEvents.Count}");

                // DEBUG PURPOSES COMMENT NEXT LINE
                //SaveDebugSpectrogram(allResults, genericConfig, outputDirectory, "name");
            }

            return(allResults);
        }
コード例 #8
0
        /// <summary>
        /// THIS IS THE CORE DETECTION METHOD
        /// Detects the human voice
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values
            int frameLength = 1024;

            if (configDict.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]);
            }

            double windowOverlap = 0.0;

            int    minHz              = int.Parse(configDict["MIN_HZ"]);
            int    minFormantgap      = int.Parse(configDict["MIN_FORMANT_GAP"]);
            int    maxFormantgap      = int.Parse(configDict["MAX_FORMANT_GAP"]);
            double intensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double minDuration        = double.Parse(configDict["MIN_DURATION"]);        // seconds
            double maxDuration        = double.Parse(configDict["MAX_DURATION"]);        // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            SonogramConfig sonoConfig = new SonogramConfig
            {
                //default values config
                SourceFName        = recording.BaseName,
                WindowSize         = frameLength,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            var    tsRecordingtDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;

            //#############################################################################################################################################
            //window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
            // 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
            // 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
            // 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int numberOfBins = 64;
            int minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int maxbin       = minBin + numberOfBins - 1;
            int maxHz        = (int)Math.Round(minHz + (numberOfBins * freqBinWidth));

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin);

            //ii: DETECT HARMONICS
            int zeroBinCount = 4; //to remove low freq content which dominates the spectrum
            var results      = CrossCorrelation.DetectBarsInTheRowsOfaMatrix(subMatrix, intensityThreshold, zeroBinCount);

            double[] intensity   = results.Item1;
            double[] periodicity = results.Item2; //an array of periodicity scores

            //intensity = DataTools.filterMovingAverage(intensity, 3);
            //expect humans to have max power >100 and < 1000 Hz. Set these bounds
            int lowerHumanMaxBound = (int)(100 / freqBinWidth);  //ignore 0-100 hz - too much noise
            int upperHumanMaxBound = (int)(3000 / freqBinWidth); //ignore above 2500 hz

            double[] scoreArray = new double[intensity.Length];
            for (int r = 0; r < rowCount; r++)
            {
                if (intensity[r] < intensityThreshold)
                {
                    continue;
                }

                //ignore locations with incorrect formant gap
                double herzPeriod = periodicity[r] * freqBinWidth;
                if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap)
                {
                    continue;
                }

                //find freq having max power and use info to adjust score.
                double[] spectrum = MatrixTools.GetRow(sonogram.Data, r);
                for (int j = 0; j < lowerHumanMaxBound; j++)
                {
                    spectrum[j] = 0.0;
                }

                for (int j = upperHumanMaxBound; j < spectrum.Length; j++)
                {
                    spectrum[j] = 0.0;
                }

                double[] peakvalues = DataTools.GetPeakValues(spectrum);
                int      maxIndex1  = DataTools.GetMaxIndex(peakvalues);
                peakvalues[maxIndex1] = 0.0;
                int maxIndex2 = DataTools.GetMaxIndex(peakvalues);
                int avMaxBin  = (maxIndex1 + maxIndex2) / 2;

                //int freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth);
                int    freqWithMaxPower = (int)Math.Round(avMaxBin * freqBinWidth);
                double discount         = 1.0;
                if (freqWithMaxPower > 1000)
                {
                    discount = 0.0;
                }
                else
                if (freqWithMaxPower < 500)
                {
                    discount = 0.0;
                }

                //set scoreArray[r]  - ignore locations with low intensity
                if (intensity[r] > intensityThreshold)
                {
                    scoreArray[r] = intensity[r] * discount;
                }
            }

            //transfer info to a hits matrix.
            var    hits      = new double[rowCount, colCount];
            double threshold = intensityThreshold * 0.75; //reduced threshold for display of hits

            for (int r = 0; r < rowCount; r++)
            {
                if (scoreArray[r] < threshold)
                {
                    continue;
                }

                double herzPeriod = periodicity[r] * freqBinWidth;
                for (int c = minBin; c < maxbin; c++)
                {
                    //hits[r, c] = herzPeriod / (double)380;  //divide by 380 to get a relativePeriod;
                    hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap;  //to get a relativePeriod;
                }
            }

            //iii: CONVERT TO ACOUSTIC EVENTS
            List <AcousticEvent> predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                scoreArray,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                intensityThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            //remove isolated speech events - expect humans to talk like politicians
            //predictedEvents = Human2.FilterHumanSpeechEvents(predictedEvents);
            Plot plot = new Plot(AnalysisName, intensity, intensityThreshold);

            return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration));
        } //Analysis()
コード例 #9
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            // common properties
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no name>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // BETTER TO CALCULATE THIS. IGNORE USER!
            // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]);
            // duration of DCT in seconds
            //double dctDuration = (double)configuration[AnalysisKeys.DctDuration];

            // minimum acceptable value of a DCT coefficient
            //double dctThreshold = (double)configuration[AnalysisKeys.DctThreshold];
            double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0;
            double decibelThreshold        = configuration.GetDouble("DecibelThreshold");

            //double minPeriod = (double)configuration["MinPeriod"]; //: 0.18
            //double maxPeriod = (double)configuration["MaxPeriod"]; //

            //int maxOscilRate = (int)Math.Ceiling(1 /minPeriod);
            //int minOscilRate = (int)Math.Floor(1 /maxPeriod);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in second
            var maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            var eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            // this default framesize and overlap is best for the White Hrron of Bhutan.
            const int frameSize     = 2048;
            double    windowOverlap = 0.0;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            var    recordingDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;
            int    minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int    maxBin       = (int)Math.Round(maxHz / freqBinWidth) + 1;

            /* #############################################################################################################################################
             * window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
             * 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
             * 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
             * 2048     17640      116.1ms           8.6         8.6    7430ms           551hz          1100hz
             * 2048     22050       92.8ms          21.5        10.7666 1472ms
             */

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            // var templates = GetTemplatesForAlgorithm1(14);
            var amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            bool[] peakArray       = new bool[rowCount];
            var    amplitudeScores = new double[rowCount];
            var    hits            = new double[rowCount, colCount];

            const int maxTemplateLength  = 20;
            const int templateEndPadding = 7;
            const int templateOffset     = 14;
            const int minimumGap         = 4;
            const int maximumGap         = 100;

            // first find the amplitude peaks
            for (int j = 1; j < amplitudeArray.Length - 1; j++)
            {
                if (amplitudeArray[j] < decibelThreshold)
                {
                    continue;
                }

                if (amplitudeArray[j] > amplitudeArray[j - 1] && amplitudeArray[j] > amplitudeArray[j + 1])
                {
                    peakArray[j] = true;
                }
            }

            // get template for end of Herron call
            var endTemplate = GetEndTemplateForAlgorithm2();

            // now search for peaks that are the correct distance apart.
            for (int i = 2; i < amplitudeArray.Length - maxTemplateLength - templateEndPadding; i++)
            {
                if (!peakArray[i])
                {
                    continue;
                }

                // calculate distance to next peak
                int distanceToNextPeak = CalculateDistanceToNextPeak(peakArray, i);

                // skip gaps that are too small or too large
                if (distanceToNextPeak < minimumGap || distanceToNextPeak > maximumGap)
                {
                    continue;
                }

                // The herron call ends with a rising whip
                // Check end of call using end template
                if (distanceToNextPeak > maxTemplateLength)
                {
                    int start = i - templateOffset;
                    if (start < 0)
                    {
                        start = 0;
                    }

                    var    endLocality = DataTools.Subarray(amplitudeArray, start, endTemplate.Length);
                    double endScore    = DataTools.CosineSimilarity(endLocality, endTemplate);
                    for (int to = -templateOffset; to < endTemplate.Length - templateOffset; to++)
                    {
                        if (i + to >= 0 && endScore > amplitudeScores[i + to])
                        {
                            amplitudeScores[i + to] = endScore;

                            // hits[i, minBin] = 10;
                        }
                    }

                    for (int k = 2; k < maxTemplateLength; k++)
                    {
                        amplitudeScores[i + k] = 0.0;
                    }

                    continue;
                }

                // Get the start template which depends on distance to next peak.
                var startTemplate = GetTemplateForAlgorithm2(distanceToNextPeak, templateEndPadding);

                // now calculate similarity of locality with the startTemplate
                var    locality = DataTools.Subarray(amplitudeArray, i - 2, startTemplate.Length); // i-2 because first two places should be zero.
                double score    = DataTools.CosineSimilarity(locality, startTemplate);
                for (int t = 0; t < startTemplate.Length; t++)
                {
                    if (score > amplitudeScores[i + t])
                    {
                        amplitudeScores[i + t] = score;
                        hits[i, minBin]        = 10;
                    }
                }
            } // loop over peak array

            var smoothedScores = DataTools.filterMovingAverageOdd(amplitudeScores, 3);

            // iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                smoothedScores,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            var prunedEvents = new List <AcousticEvent>();

            foreach (var ae in predictedEvents)
            {
                if (ae.EventDurationSeconds < minDuration)
                {
                    continue;
                }

                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.Name = abbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            // do a recognizer test.
            //CompareArrayWithBenchmark(scores, new FileInfo(recording.FilePath));
            //CompareArrayWithBenchmark(prunedEvents, new FileInfo(recording.FilePath));

            var plot = new Plot(this.DisplayName, amplitudeScores, eventThreshold);

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = prunedEvents,
            });
        }
コード例 #10
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            // WARNING: TODO TODO TODO = this method simply duplicates the CANETOAD analyser!!!!!!!!!!!!!!!!!!!!! ###################

            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // BETTER TO CALCULATE THIS. IGNORE USER!
            // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]);

            // duration of DCT in seconds
            double dctDuration = configuration.GetDouble(AnalysisKeys.DctDuration);

            // minimum acceptable value of a DCT coefficient
            double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold);

            // ignore oscillations below this threshold freq
            int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq);

            // ignore oscillations above this threshold freq
            int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in seconds
            double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            // The default was 512 for Canetoad.
            // Framesize = 128 seems to work for Littoria fallax.
            // frame size
            int frameSize = configuration.GetInt(AnalysisKeys.KeyFrameSize);

            if (recording.WavReader.SampleRate != 22050)
            {
                throw new InvalidOperationException("Requires a 22050Hz file");
            }

            double windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap(
                recording.SampleRate,
                frameSize,
                maxOscilFreq);

            //windowOverlap = 0.75; // previous default

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameSize,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = NoiseReductionType.None,
            };

            // sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("STANDARD");
            TimeSpan recordingDuration = recording.Duration;
            int      sr           = recording.SampleRate;
            double   freqBinWidth = sr / (double)sonoConfig.WindowSize;

            /* #############################################################################################################################################
             * window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
             * 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
             * 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
             * 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz
             */

            // int minBin = (int)Math.Round(minHz / freqBinWidth) + 1;
            // int maxbin = minBin + numberOfBins - 1;
            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            // double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, (rowCount - 1), maxbin);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            //minDuration = 1.0;
            Oscillations2012.Execute(
                (SpectrogramStandard)sonogram,
                minHz,
                maxHz,
                dctDuration,
                minOscilFreq,
                maxOscilFreq,
                dctThreshold,
                eventThreshold,
                minDuration,
                maxDuration,
                out var scores,
                out var acousticEvents,
                out var hits,
                segmentStartOffset);

            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = speciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.Name = abbreviatedSpeciesName;
            });

            var plot = new Plot(this.DisplayName, scores, eventThreshold);

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = acousticEvents,
            });
        }
コード例 #11
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            var recognizerConfig = new LitoriaNasutaConfig();

            recognizerConfig.ReadConfigFile(configuration);

            // common properties
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no name>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            // BETTER TO SET THESE. IGNORE USER!
            // This framesize is large because the oscillation we wish to detect is due to repeated croaks
            // having an interval of about 0.6 seconds. The overlap is also required to give smooth oscillation.
            const int    frameSize     = 1024;
            const double windowOverlap = 0.5;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // use the default HAMMING window
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),

                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.None
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.0,
            };

            // Get the recording
            TimeSpan recordingDuration = recording.WavReader.Time;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = sr / (sonoConfig.WindowSize * (1 - windowOverlap));

            // Get the alorithm parameters
            int minBin           = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1;
            int maxBin           = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1;
            var decibelThreshold = 9.0;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            int rowCount = sonogram.Data.GetLength(0);

            // get the freq band as set by min and max Herz
            var frogBand = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            // Now look for spectral maxima. For L.caerulea, the max should lie around 1100Hz +/-150 Hz.
            // Skip over spectra where maximum is not in correct location.
            int buffer            = 200;
            var croakScoreArray   = new double[rowCount];
            var hzAtTopOfTopBand  = recognizerConfig.DominantFreq + buffer;
            var hzAtBotOfTopBand  = recognizerConfig.DominantFreq - buffer;
            var binAtTopOfTopBand = (int)Math.Round((hzAtTopOfTopBand - recognizerConfig.MinHz) / freqBinWidth);
            var binAtBotOfTopBand = (int)Math.Round((hzAtBotOfTopBand - recognizerConfig.MinHz) / freqBinWidth);

            var hzAtTopOfBotBand  = recognizerConfig.SubdominantFreq + buffer;
            var hzAtBotOfBotBand  = recognizerConfig.SubdominantFreq - buffer;
            var binAtTopOfBotBand = (int)Math.Round((hzAtTopOfBotBand - recognizerConfig.MinHz) / freqBinWidth);
            var binAtBotOfBotBand = (int)Math.Round((hzAtBotOfBotBand - recognizerConfig.MinHz) / freqBinWidth);

            // scan the frog band and get the decibel value of those spectra which have their maximum within the correct subband.
            for (int x = 0; x < rowCount; x++)
            {
                //extract spectrum
                var    spectrum             = MatrixTools.GetRow(frogBand, x);
                int    maxIndex1            = DataTools.GetMaxIndex(spectrum);
                double maxValueInTopSubband = spectrum[maxIndex1];
                if (maxValueInTopSubband < decibelThreshold)
                {
                    continue;
                }

                // if max value not in correct sub-band then go to next spectrum
                if (maxIndex1 > binAtTopOfTopBand && maxIndex1 < binAtBotOfTopBand)
                {
                    continue;
                }

                // minimise values in top sub-band so can find maximum in bottom sub-band
                for (int y = binAtBotOfTopBand; y < binAtTopOfTopBand; y++)
                {
                    spectrum[y] = 0.0;
                }

                int maxIndex2 = DataTools.GetMaxIndex(spectrum);

                // if max value properly placed in top and bottom sub-bands then assign maxValue to croakScore array
                if (maxIndex2 < binAtTopOfBotBand && maxIndex2 > binAtBotOfTopBand)
                {
                    croakScoreArray[x] = maxValueInTopSubband;
                }
            }

            // Perpare a normalised plot for later display with spectrogram
            DataTools.Normalise(croakScoreArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold);
            var text1      = string.Format($"Croak scores (threshold={decibelThreshold})");
            var croakPlot1 = new Plot(text1, normalisedScores, normalisedThreshold);

            // extract potential croak events from the array of croak candidate
            var croakEvents = AcousticEvent.ConvertScoreArray2Events(
                croakScoreArray,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                recognizerConfig.EventThreshold,
                recognizerConfig.MinCroakDuration,
                recognizerConfig.MaxCroakDuration,
                segmentStartOffset);

            // add necesary info into the candidate events
            double[,] hits = null;
            var prunedEvents = new List <AcousticEvent>();

            foreach (var ae in croakEvents)
            {
                // add additional info
                ae.SpeciesName            = speciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
                prunedEvents.Add(ae);
            }

            /*
             * // DO NOT LOOK FOR  A PULSE TRAIN because recording from Karlina does not have one for L.nasuta.
             *
             * // With those events that survive the above Array2Events process, we now extract a new array croak scores
             * croakScoreArray = AcousticEvent.ExtractScoreArrayFromEvents(prunedEvents, rowCount, recognizerConfig.AbbreviatedSpeciesName);
             * DataTools.Normalise(croakScoreArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
             * var text2 = string.Format($"Croak events (threshold={decibelThreshold})");
             * var croakPlot2 = new Plot(text2, normalisedScores, normalisedThreshold);
             *
             *
             * // Look for oscillations in the difference array
             * // duration of DCT in seconds
             * croakScoreArray = DataTools.filterMovingAverageOdd(croakScoreArray, 5);
             * double dctDuration = recognizerConfig.DctDuration;
             * // minimum acceptable value of a DCT coefficient
             * double dctThreshold = recognizerConfig.DctThreshold;
             * double minOscRate = 1 / recognizerConfig.MaxPeriod;
             * double maxOscRate = 1 / recognizerConfig.MinPeriod;
             * var dctScores = Oscillations2012.DetectOscillations(croakScoreArray, framesPerSecond, dctDuration, minOscRate, maxOscRate, dctThreshold);
             *
             *
             * // ######################################################################
             * // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
             * var events = AcousticEvent.ConvertScoreArray2Events(dctScores, recognizerConfig.MinHz, recognizerConfig.MaxHz, sonogram.FramesPerSecond,
             *                                                            freqBinWidth, recognizerConfig.EventThreshold,
             *                                                            recognizerConfig.MinDuration, recognizerConfig.MaxDuration);
             * prunedEvents = new List<AcousticEvent>();
             * foreach (var ae in events)
             * {
             *  // add additional info
             *  ae.SpeciesName = speciesName;
             *  ae.SegmentStartOffset = segmentStartOffset;
             *  ae.AnalysisIdealSegmentDuration = recordingDuration;
             *  ae.Name = recognizerConfig.AbbreviatedSpeciesName;
             *  prunedEvents.Add(ae);
             * }
             * var scoresPlot = new Plot(this.DisplayName, dctScores, recognizerConfig.EventThreshold);
             */

            // do a recognizer test.
            if (MainEntry.InDEBUG)
            {
                //TestTools.RecognizerScoresTest(scores, new FileInfo(recording.FilePath));
                //AcousticEvent.TestToCompareEvents(prunedEvents, new FileInfo(recording.FilePath));
            }

            var scoresPlot = new Plot(this.DisplayName, croakScoreArray, recognizerConfig.EventThreshold);

            if (true)
            {
                // display a variety of debug score arrays
                // calculate amplitude at location
                double[] amplitudeArray = MatrixTools.SumRows(frogBand);
                DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scoresPlot, /*croakPlot2,*/ croakPlot1, amplPlot
                };

                // NOTE: This DrawDebugImage() method can be over-written in this class.
                var debugImage = DrawDebugImage(sonogram, prunedEvents, debugPlots, hits);
                var debugPath  = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");
                debugImage.Save(debugPath);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = scoresPlot.AsList(),
                Events = prunedEvents,

                //Events = events
            });
        }
コード例 #12
0
        /// <summary>
        /// THE KEY ANALYSIS METHOD
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="sonoConfig"></param>
        /// <param name="lrConfig"></param>
        /// <param name="returnDebugImage"></param>
        /// <param name="segmentStartOffset"></param>
        /// <returns></returns>
        private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LewinsRailConfig lrConfig,
            bool returnDebugImage,
            TimeSpan segmentStartOffset)
        {
            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            int sr = recording.SampleRate;

            int upperBandMinHz = lrConfig.UpperBandMinHz;
            int upperBandMaxHz = lrConfig.UpperBandMaxHz;
            int lowerBandMinHz = lrConfig.LowerBandMinHz;
            int lowerBandMaxHz = lrConfig.LowerBandMaxHz;

            //double decibelThreshold = lrConfig.DecibelThreshold;   //dB
            //int windowSize = lrConfig.WindowSize;
            double eventThreshold = lrConfig.EventThreshold; //in 0-1
            double minDuration    = lrConfig.MinDuration;    // seconds
            double maxDuration    = lrConfig.MaxDuration;    // seconds
            double minPeriod      = lrConfig.MinPeriod;      // seconds
            double maxPeriod      = lrConfig.MaxPeriod;      // seconds

            //double freqBinWidth = sr / (double)windowSize;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;

            //i: MAKE SONOGRAM
            double framesPerSecond = freqBinWidth;

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700

            int upperBandMinBin = (int)Math.Round(upperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(upperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            //ALTERNATIVE IS TO USE THE AMPLITUDE SPECTRUM
            //var results2 = DSP_Frames.ExtractEnvelopeAndFFTs(recording.GetWavReader().Samples, sr, frameSize, windowOverlap);
            //double[,] matrix = results2.Item3;  //amplitude spectrogram. Note that column zero is the DC or average energy value and can be ignored.
            //double[] avAbsolute = results2.Item1; //average absolute value over the minute recording
            ////double[] envelope = results2.Item2;
            //double windowPower = results2.Item4;

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            int step         = (int)Math.Round(framesPerSecond); //take one second steps
            int stepCount    = rowCount / step;
            int sampleLength = 64;                               //64 frames = 3.7 seconds. Suitable for Lewins Rail.

            double[] intensity   = new double[rowCount];
            double[] periodicity = new double[rowCount];

            //######################################################################
            //ii: DO THE ANALYSIS AND RECOVER SCORES
            for (int i = 0; i < stepCount; i++)
            {
                int      start         = step * i;
                double[] lowerSubarray = DataTools.Subarray(lowerArray, start, sampleLength);
                double[] upperSubarray = DataTools.Subarray(upperArray, start, sampleLength);
                if (lowerSubarray.Length != sampleLength || upperSubarray.Length != sampleLength)
                {
                    break;
                }

                var spectrum  = AutoAndCrossCorrelation.CrossCorr(lowerSubarray, upperSubarray);
                int zeroCount = 3;
                for (int s = 0; s < zeroCount; s++)
                {
                    spectrum[s] = 0.0;  //in real data these bins are dominant and hide other frequency content
                }

                spectrum = DataTools.NormaliseArea(spectrum);
                int    maxId  = DataTools.GetMaxIndex(spectrum);
                double period = 2 * sampleLength / (double)maxId / framesPerSecond; //convert maxID to period in seconds
                if (period < minPeriod || period > maxPeriod)
                {
                    continue;
                }

                // lay down score for sample length
                for (int j = 0; j < sampleLength; j++)
                {
                    if (intensity[start + j] < spectrum[maxId])
                    {
                        intensity[start + j] = spectrum[maxId];
                    }

                    periodicity[start + j] = period;
                }
            }

            //######################################################################

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            intensity = DataTools.filterMovingAverage(intensity, 5);

            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                intensity,
                lowerBandMinHz,
                upperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            CropEvents(predictedEvents, upperArray, segmentStartOffset);
            var hits = new double[rowCount, colCount];

            //######################################################################

            var   scorePlot  = new Plot("L.pect", intensity, lrConfig.IntensityThreshold);
            Image debugImage = null;

            if (returnDebugImage)
            {
                // display a variety of debug score arrays
                double[] normalisedScores;
                double   normalisedThreshold;
                DataTools.Normalise(intensity, lrConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold);
                var intensityPlot = new Plot("Intensity", normalisedScores, normalisedThreshold);
                DataTools.Normalise(periodicity, 10, out normalisedScores, out normalisedThreshold);
                var periodicityPlot = new Plot("Periodicity", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, intensityPlot, periodicityPlot
                };
                debugImage = DrawDebugImage(sonogram, predictedEvents, debugPlots, hits);
            }

            return(Tuple.Create(sonogram, hits, intensity, predictedEvents, debugImage));
        } //Analysis()
コード例 #13
0
        public static AudioToSonogramResult GenerateSpectrogramImages(FileInfo sourceRecording, Dictionary <string, string> configDict, DirectoryInfo opDir)
        {
            string sourceName = configDict[ConfigKeys.Recording.Key_RecordingFileName];

            sourceName = Path.GetFileNameWithoutExtension(sourceName);

            var result = new AudioToSonogramResult();

            // init the image stack
            var list = new List <Image <Rgb24> >();

            // 1) draw amplitude spectrogram
            AudioRecording recordingSegment = new AudioRecording(sourceRecording.FullName);
            var            sonoConfig       =
                new SonogramConfig(configDict)
            {
                NoiseReductionType = NoiseReductionType.None,
            };     // default values config

            // disable noise removal for first two spectrograms

            BaseSonogram sonogram = new AmplitudeSonogram(sonoConfig, recordingSegment.WavReader);

            // remove the DC bin
            sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.FrameCount - 1, sonogram.Configuration.FreqBinCount);

            //save spectrogram data at this point - prior to noise reduction
            double[,] spectrogramDataBeforeNoiseReduction = sonogram.Data;

            int    lowPercentile        = 20;
            double neighbourhoodSeconds = 0.25;
            int    neighbourhoodFrames  = (int)(sonogram.FramesPerSecond * neighbourhoodSeconds);
            double lcnContrastLevel     = 0.25;

            //LoggedConsole.WriteLine("LCN: FramesPerSecond (Prior to LCN) = {0}", sonogram.FramesPerSecond);
            //LoggedConsole.WriteLine("LCN: Neighbourhood of {0} seconds = {1} frames", neighbourhoodSeconds, neighbourhoodFrames);
            sonogram.Data = NoiseRemoval_Briggs.NoiseReduction_ShortRecordings_SubtractAndLCN(sonogram.Data, lowPercentile, neighbourhoodFrames, lcnContrastLevel);

            // draw amplitude spectrogram unannotated
            FileInfo outputImage1 = new FileInfo(Path.Combine(opDir.FullName, sourceName + ".amplitd.png"));

            ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(sonogram.Data), outputImage1.FullName);

            // draw amplitude spectrogram annotated
            var image = sonogram.GetImageFullyAnnotated("AMPLITUDE SPECTROGRAM + Bin LCN (Local Contrast Normalisation)");

            list.Add(image);

            //string path2 = @"C:\SensorNetworks\Output\Sonograms\dataInput2.png";
            //Histogram.DrawDistributionsAndSaveImage(sonogram.Data, path2);

            // 2) A FALSE-COLOUR VERSION OF AMPLITUDE SPECTROGRAM
            double ridgeThreshold = 0.20;

            double[,] matrix = ImageTools.WienerFilter(sonogram.Data, 3);
            byte[,] hits     = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold);
            hits             = RidgeDetection.JoinDisconnectedRidgesInMatrix(hits, matrix, ridgeThreshold);
            image            = SpectrogramTools.CreateFalseColourAmplitudeSpectrogram(spectrogramDataBeforeNoiseReduction, null, hits);
            image            = sonogram.GetImageAnnotatedWithLinearHerzScale(image, "AMPLITUDE SPECTROGRAM + LCN + ridge detection");
            list.Add(image);

            var envelopeImage = ImageTrack.DrawWaveEnvelopeTrack(recordingSegment, image.Width);

            list.Add(envelopeImage);

            // 3) now draw the standard decibel spectrogram
            sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader);

            // draw decibel spectrogram unannotated
            FileInfo outputImage2 = new FileInfo(Path.Combine(opDir.FullName, sourceName + ".deciBel.png"));

            ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(sonogram.Data), outputImage2.FullName);

            image = sonogram.GetImageFullyAnnotated("DECIBEL SPECTROGRAM");
            list.Add(image);

            var segmentationImage = ImageTrack.DrawSegmentationTrack(
                sonogram,
                EndpointDetectionConfiguration.K1Threshold,
                EndpointDetectionConfiguration.K2Threshold,
                image.Width);

            list.Add(segmentationImage);

            // keep the sonogram data (NOT noise reduced) for later use
            double[,] dbSpectrogramData = (double[, ])sonogram.Data.Clone();

            // 4) now draw the noise reduced decibel spectrogram
            sonoConfig.NoiseReductionType      = NoiseReductionType.Standard;
            sonoConfig.NoiseReductionParameter = 3;

            //sonoConfig.NoiseReductionType = NoiseReductionType.SHORT_RECORDING;
            //sonoConfig.NoiseReductionParameter = 50;

            sonogram = new SpectrogramStandard(sonoConfig, recordingSegment.WavReader);

            // draw decibel spectrogram unannotated
            FileInfo outputImage3 = new FileInfo(Path.Combine(opDir.FullName, sourceName + ".noNoise_dB.png"));

            ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(sonogram.Data), outputImage3.FullName);
            image = sonogram.GetImageFullyAnnotated("DECIBEL SPECTROGRAM + Lamel noise subtraction");
            list.Add(image);

            // keep the sonogram data for later use
            double[,] nrSpectrogramData = sonogram.Data;

            // 5) A FALSE-COLOUR VERSION OF DECIBEL SPECTROGRAM
            ridgeThreshold = 2.5;
            matrix         = ImageTools.WienerFilter(dbSpectrogramData, 3);
            hits           = RidgeDetection.Sobel5X5RidgeDetectionExperiment(matrix, ridgeThreshold);

            image = SpectrogramTools.CreateFalseColourDecibelSpectrogram(dbSpectrogramData, nrSpectrogramData, hits);
            image = sonogram.GetImageAnnotatedWithLinearHerzScale(image, "DECIBEL SPECTROGRAM - Colour annotated");
            list.Add(image);

            // 6) COMBINE THE SPECTROGRAM IMAGES
            Image    compositeImage = ImageTools.CombineImagesVertically(list);
            FileInfo outputImage    = new FileInfo(Path.Combine(opDir.FullName, sourceName + ".5spectro.png"));

            compositeImage.Save(outputImage.FullName);
            result.SpectrogramFile = outputImage;

            // 7) Generate the FREQUENCY x OSCILLATIONS Graphs and csv data
            //bool saveData = true;
            //bool saveImage = true;
            //double[] oscillationsSpectrum = Oscillations2014.GenerateOscillationDataAndImages(sourceRecording, configDict, saveData, saveImage);
            return(result);
        }
コード例 #14
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            string       speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            string       abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";
            const int    frameSize     = 256;
            const double windowOverlap = 0.0;

            double noiseReductionParameter = configuration.GetDoubleOrNull("SeverityOfNoiseRemoval") ?? 2.0;

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // ignore oscillations below this threshold freq
            int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq);

            // ignore oscillations above this threshold freq
            int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq);

            // duration of DCT in seconds
            //double dctDuration = (double)configuration[AnalysisKeys.DctDuration];

            // minimum acceptable value of a DCT coefficient
            double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in seconds
            double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            double decibelThreshold = configuration.GetDouble(AnalysisKeys.DecibelThreshold);

            // min score for an acceptable event
            double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            if (recording.WavReader.SampleRate != 22050)
            {
                throw new InvalidOperationException("Requires a 22050Hz file");
            }

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName             = recording.BaseName,
                WindowSize              = frameSize,
                WindowOverlap           = windowOverlap,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = noiseReductionParameter,
            };

            var    recordingDuration = recording.Duration;
            int    sr           = recording.SampleRate;
            double freqBinWidth = sr / (double)sonoConfig.WindowSize;
            int    minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int    maxBin       = (int)Math.Round(maxHz / freqBinWidth) + 1;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double framesPerSecond = freqBinWidth;
            double minPeriod       = 1 / (double)maxOscilFreq;
            double maxPeriod       = 1 / (double)minOscilFreq;
            double dctDuration     = 5 * maxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);

            double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            // remove baseline from amplitude array
            var highPassFilteredSignal = DspFilters.SubtractBaseline(amplitudeArray, 7);

            // remove hi freq content from amplitude array
            var lowPassFilteredSignal = DataTools.filterMovingAverageOdd(amplitudeArray, 11);

            var       dctScores = new double[highPassFilteredSignal.Length];
            const int step      = 2;

            for (int i = dctLength; i < highPassFilteredSignal.Length - dctLength; i += step)
            {
                if (highPassFilteredSignal[i] < decibelThreshold)
                {
                    continue;
                }

                double[] subArray = DataTools.Subarray(highPassFilteredSignal, i, dctLength);

                // Look for oscillations in the highPassFilteredSignal
                Oscillations2014.GetOscillationUsingDct(subArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity);
                bool periodWithinBounds = period > minPeriod && period < maxPeriod;

                if (!periodWithinBounds)
                {
                    continue;
                }

                if (intensity < dctThreshold)
                {
                    continue;
                }

                //lay down score for sample length
                for (int j = 0; j < dctLength; j++)
                {
                    if (dctScores[i + j] < intensity && lowPassFilteredSignal[i + j] > decibelThreshold)
                    {
                        dctScores[i + j] = intensity;
                    }
                }
            }

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
                dctScores,
                minHz,
                maxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                eventThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            // ######################################################################
            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = speciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.Name = abbreviatedSpeciesName;
            });

            var plot  = new Plot(this.DisplayName, dctScores, eventThreshold);
            var plots = new List <Plot> {
                plot
            };

            // DEBUG IMAGE this recognizer only. MUST set false for deployment.
            bool displayDebugImage = MainEntry.InDEBUG;

            if (displayDebugImage)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(amplitudeArray, decibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var ampltdPlot = new Plot("amplitude", normalisedScores, normalisedThreshold);
                DataTools.Normalise(highPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var demeanedPlot = new Plot("Hi Pass", normalisedScores, normalisedThreshold);

                DataTools.Normalise(lowPassFilteredSignal, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var lowPassPlot = new Plot("Low Pass", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    ampltdPlot, lowPassPlot, demeanedPlot, plot
                };
                Image debugImage = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, debugPlots, null);
                var   debugPath  = outputDirectory.Combine(FilenameHelpers.AnalysisResultName(Path.GetFileNameWithoutExtension(recording.BaseName), this.Identifier, "png", "DebugSpectrogram"));
                debugImage.Save(debugPath.FullName);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = null,
                Plots = plots,
                Events = acousticEvents,
            });
        }
コード例 #15
0
        /// <summary>
        /// Calculates the mean intensity in a freq band defined by its min and max freq.
        /// THis method averages dB log values incorrectly but it is faster than doing many log conversions.
        /// This method is used to find acoustic events and is accurate enough for the purpose.
        /// </summary>
        public static (List <AcousticEvent>, double[]) GetWhistles(
            SpectrogramStandard sonogram,
            int minHz,
            int maxHz,
            int nyquist,
            double decibelThreshold,
            double minDuration,
            double maxDuration,
            TimeSpan segmentStartOffset)
        {
            var sonogramData = sonogram.Data;
            int frameCount   = sonogramData.GetLength(0);
            int binCount     = sonogramData.GetLength(1);

            double binWidth = nyquist / (double)binCount;
            int    minBin   = (int)Math.Round(minHz / binWidth);
            int    maxBin   = (int)Math.Round(maxHz / binWidth);
            //int binCountInBand = maxBin - minBin + 1;

            // buffer zone around whistle is four bins wide.
            int N = 4;

            // list of accumulated acoustic events
            var events = new List <AcousticEvent>();
            var combinedIntensityArray = new double[frameCount];

            // for all frequency bins except top and bottom
            for (int bin = minBin + 1; bin < maxBin; bin++)
            {
                // set up an intensity array for the frequency bin.
                double[] intensity = new double[frameCount];

                if (minBin < N)
                {
                    // for all time frames in this frequency bin
                    for (int t = 0; t < frameCount; t++)
                    {
                        var bandIntensity        = (sonogramData[t, bin - 1] + sonogramData[t, bin] + sonogramData[t, bin + 1]) / 3.0;
                        var topSideBandIntensity = (sonogramData[t, bin + 3] + sonogramData[t, bin + 4] + sonogramData[t, bin + 5]) / 3.0;
                        intensity[t] = bandIntensity - topSideBandIntensity;
                        intensity[t] = Math.Max(0.0, intensity[t]);
                    }
                }
                else
                {
                    // for all time frames in this frequency bin
                    for (int t = 0; t < frameCount; t++)
                    {
                        var bandIntensity           = (sonogramData[t, bin - 1] + sonogramData[t, bin] + sonogramData[t, bin + 1]) / 3.0;
                        var topSideBandIntensity    = (sonogramData[t, bin + 3] + sonogramData[t, bin + 4] + sonogramData[t, bin + 5]) / 6.0;
                        var bottomSideBandIntensity = (sonogramData[t, bin - 3] + sonogramData[t, bin - 4] + sonogramData[t, bin - 5]) / 6.0;
                        intensity[t] = bandIntensity - topSideBandIntensity - bottomSideBandIntensity;
                        intensity[t] = Math.Max(0.0, intensity[t]);
                    }
                }

                // smooth the decibel array to allow for brief gaps.
                intensity = DataTools.filterMovingAverageOdd(intensity, 7);

                //calculate the Hertz bounds of the acoustic events for these freq bins
                int bottomHzBound = (int)Math.Floor(sonogram.FBinWidth * (bin - 1));
                int topHzBound    = (int)Math.Ceiling(sonogram.FBinWidth * (bin + 2));

                //extract the events based on length and threshhold.
                // Note: This method does NOT do prior smoothing of the dB array.
                var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
                    intensity,
                    bottomHzBound,
                    topHzBound,
                    sonogram.FramesPerSecond,
                    sonogram.FBinWidth,
                    decibelThreshold,
                    minDuration,
                    maxDuration,
                    segmentStartOffset);

                // add to conbined intensity array
                for (int t = 0; t < frameCount; t++)
                {
                    //combinedIntensityArray[t] += intensity[t];
                    combinedIntensityArray[t] = Math.Max(intensity[t], combinedIntensityArray[t]);
                }

                // combine events
                events.AddRange(acousticEvents);
            } //end for all freq bins

            // combine adjacent acoustic events
            //events = AcousticEvent.CombineOverlappingEvents(events);

            return(events, combinedIntensityArray);
        }
コード例 #16
0
        public static void Execute(Arguments arguments)
        {
            if (arguments == null)
            {
                arguments = Dev();
            }

            LoggedConsole.WriteLine("DATE AND TIME:" + DateTime.Now);
            LoggedConsole.WriteLine("Syntactic Pattern Recognition\n");
            //StringBuilder sb = new StringBuilder("DATE AND TIME:" + DateTime.Now + "\n");
            //sb.Append("SCAN ALL RECORDINGS IN A DIRECTORY USING HTK-RECOGNISER\n");

            Log.Verbosity = 1;

            FileInfo      recordingPath = arguments.Source;
            FileInfo      iniPath       = arguments.Config;
            DirectoryInfo outputDir     = arguments.Output;
            string        opFName       = "SPR-output.txt";
            string        opPath        = outputDir + opFName;

            Log.WriteIfVerbose("# Output folder =" + outputDir);

            // A: READ PARAMETER VALUES FROM INI FILE
            var config = new ConfigDictionary(iniPath);
            Dictionary <string, string> dict = config.GetTable();

            Dictionary <string, string> .KeyCollection keys = dict.Keys;

            string callName     = dict[key_CALL_NAME];
            double frameOverlap = Convert.ToDouble(dict[key_FRAME_OVERLAP]);
            //SPT PARAMETERS
            double intensityThreshold   = Convert.ToDouble(dict[key_SPT_INTENSITY_THRESHOLD]);
            int    smallLengthThreshold = Convert.ToInt32(dict[key_SPT_SMALL_LENGTH_THRESHOLD]);
            //WHIPBIRD PARAMETERS
            int    whistle_MinHz          = int.Parse(dict[key_WHISTLE_MIN_HZ]);
            int    whistle_MaxHz          = int.Parse(dict[key_WHISTLE_MAX_HZ]);
            double optimumWhistleDuration = double.Parse(dict[key_WHISTLE_DURATION]);   //optimum duration of whistle in seconds
            int    whip_MinHz             = (dict.ContainsKey(key_WHIP_MIN_HZ)) ? int.Parse(dict[key_WHIP_MIN_HZ]) : 0;
            int    whip_MaxHz             = (dict.ContainsKey(key_WHIP_MAX_HZ))   ? int.Parse(dict[key_WHIP_MAX_HZ])    : 0;
            double whipDuration           = (dict.ContainsKey(key_WHIP_DURATION)) ? double.Parse(dict[key_WHIP_DURATION]) : 0.0; //duration of whip in seconds
            //CURLEW PARAMETERS
            double minDuration = (dict.ContainsKey(key_MIN_DURATION)) ? double.Parse(dict[key_MIN_DURATION]) : 0.0;              //min duration of call in seconds
            double maxDuration = (dict.ContainsKey(key_MAX_DURATION)) ? double.Parse(dict[key_MAX_DURATION]) : 0.0;              //duration of call in seconds

            double eventThreshold = double.Parse(dict[key_EVENT_THRESHOLD]);                                                     //min score for an acceptable event
            int    DRAW_SONOGRAMS = Convert.ToInt16(dict[key_DRAW_SONOGRAMS]);

            // B: CHECK to see if conversion from .MP3 to .WAV is necessary
            var destinationAudioFile = recordingPath;

            //LOAD RECORDING AND MAKE SONOGRAM
            BaseSonogram sonogram = null;

            using (var recording = new AudioRecording(destinationAudioFile.FullName))
            {
                // if (recording.SampleRate != 22050) recording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE

                var sonoConfig = new SonogramConfig
                {
                    NoiseReductionType = NoiseReductionType.None,
                    //NoiseReductionType = NoiseReductionType.STANDARD,
                    WindowOverlap = frameOverlap,
                };
                sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            }

            List <AcousticEvent> predictedEvents = null;

            double[,] hits = null;
            double[] scores = null;

            var audioFileName = Path.GetFileNameWithoutExtension(destinationAudioFile.FullName);

            if (callName.Equals("WHIPBIRD"))
            {
                //SPT
                var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold);
                //SPR
                Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold);
                int    slope       = 0;   //degrees of the circle. i.e. 90 = vertical line.
                double sensitivity = 0.7; //lower value = more sensitive
                var    mHori       = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity);
                slope       = 87;         //84
                sensitivity = 0.8;        //lower value = more sensitive
                var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 4, intensityThreshold + 1, sensitivity);
                Log.WriteLine("SPR finished");
                Log.WriteLine("Extract Whipbird calls - start");

                int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth);
                int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth);
                int whistleFrames    = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); //86 = frames/sec.
                int minBound_Whip    = (int)(whip_MinHz / sonogram.FBinWidth);
                int maxBound_Whip    = (int)(whip_MaxHz / sonogram.FBinWidth);
                int whipFrames       = (int)(sonogram.FramesPerSecond * whipDuration); //86 = frames/sec.
                var result3          = DetectWhipBird(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames, minBound_Whip, maxBound_Whip, whipFrames, smallLengthThreshold);
                scores = result3.Item1;
                hits   = DataTools.AddMatrices(mHori, mVert);

                predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                    scores,
                    whip_MinHz,
                    whip_MaxHz,
                    sonogram.FramesPerSecond,
                    sonogram.FBinWidth,
                    eventThreshold,
                    minDuration,
                    maxDuration,
                    TimeSpan.Zero);
                foreach (AcousticEvent ev in predictedEvents)
                {
                    ev.FileName = audioFileName;
                    ev.Name     = callName;
                }

                sonogram.Data = result1.Item1;
                Log.WriteLine("Extract Whipbird calls - finished");
            }
            else if (callName.Equals("CURLEW"))
            {
                //SPT
                double backgroundThreshold = 4.0;
                var    result1             = SNR.NoiseReduce(sonogram.Data, NoiseReductionType.Standard, backgroundThreshold);
                //var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold);
                //var result1 = doNoiseRemoval(sonogram, intensityThreshold, smallLengthThreshold);

                //SPR
                Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold);
                int    slope       = 20;  //degrees of the circle. i.e. 90 = vertical line.
                double sensitivity = 0.8; //lower value = more sensitive
                var    mHori       = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity);
                slope       = 160;
                sensitivity = 0.8;        //lower value = more sensitive
                var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 3, intensityThreshold + 1, sensitivity);
                Log.WriteLine("SPR finished");

                //detect curlew calls
                int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth);
                int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth);
                int whistleFrames    = (int)(sonogram.FramesPerSecond * optimumWhistleDuration);
                var result3          = DetectCurlew(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames, smallLengthThreshold);

                //process curlew scores - look for curlew characteristic periodicity
                double minPeriod        = 1.2;
                double maxPeriod        = 1.8;
                int    minPeriod_frames = (int)Math.Round(sonogram.FramesPerSecond * minPeriod);
                int    maxPeriod_frames = (int)Math.Round(sonogram.FramesPerSecond * maxPeriod);
                scores = DataTools.filterMovingAverage(result3.Item1, 21);
                scores = DataTools.PeriodicityDetection(scores, minPeriod_frames, maxPeriod_frames);

                //extract events
                predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                    scores,
                    whistle_MinHz,
                    whistle_MaxHz,
                    sonogram.FramesPerSecond,
                    sonogram.FBinWidth,
                    eventThreshold,
                    minDuration,
                    maxDuration,
                    TimeSpan.Zero);
                foreach (AcousticEvent ev in predictedEvents)
                {
                    ev.FileName = audioFileName;
                    ev.Name     = callName;
                }

                hits          = DataTools.AddMatrices(mHori, mVert);
                sonogram.Data = result1.Item1;
                Log.WriteLine("Extract Curlew calls - finished");
            }
            else if (callName.Equals("CURRAWONG"))
            {
                //SPT
                var result1 = SPT.doSPT(sonogram, intensityThreshold, smallLengthThreshold);
                //SPR
                Log.WriteLine("SPR start: intensity threshold = " + intensityThreshold);
                int slope = 70;           //degrees of the circle. i.e. 90 = vertical line.
                //slope = 210;
                double sensitivity = 0.7; //lower value = more sensitive
                var    mHori       = MarkLine(result1.Item1, slope, smallLengthThreshold, intensityThreshold, sensitivity);
                slope = 110;
                //slope = 340;
                sensitivity = 0.7;        //lower value = more sensitive
                var mVert = MarkLine(result1.Item1, slope, smallLengthThreshold - 3, intensityThreshold + 1, sensitivity);
                Log.WriteLine("SPR finished");

                int minBound_Whistle = (int)(whistle_MinHz / sonogram.FBinWidth);
                int maxBound_Whistle = (int)(whistle_MaxHz / sonogram.FBinWidth);
                int whistleFrames    = (int)(sonogram.FramesPerSecond * optimumWhistleDuration); //86 = frames/sec.
                var result3          = DetectCurlew(mHori, mVert, minBound_Whistle, maxBound_Whistle, whistleFrames + 10, smallLengthThreshold);
                scores = result3.Item1;
                hits   = DataTools.AddMatrices(mHori, mVert);

                predictedEvents = AcousticEvent.ConvertIntensityArray2Events(
                    scores,
                    TimeSpan.Zero,
                    whistle_MinHz,
                    whistle_MaxHz,
                    sonogram.FramesPerSecond,
                    sonogram.FBinWidth,
                    eventThreshold,
                    0.5,
                    maxDuration);
                foreach (AcousticEvent ev in predictedEvents)
                {
                    ev.FileName = audioFileName;
                    //ev.Name = callName;
                }
            }

            //write event count to results file.
            double sigDuration = sonogram.Duration.TotalSeconds;
            //string fname = Path.GetFileName(recordingPath);
            int count = predictedEvents.Count;

            Log.WriteIfVerbose("Number of Events: " + count);
            string str = string.Format("{0}\t{1}\t{2}", callName, sigDuration, count);

            FileTools.WriteTextFile(opPath, AcousticEvent.WriteEvents(predictedEvents, str).ToString());

            // SAVE IMAGE
            string imageName = outputDir + audioFileName;
            string imagePath = imageName + ".png";

            if (File.Exists(imagePath))
            {
                int suffix = 1;
                while (File.Exists(imageName + "." + suffix.ToString() + ".png"))
                {
                    suffix++;
                }
                //{
                //    suffix = (suffix == string.Empty) ? "1" : (int.Parse(suffix) + 1).ToString();
                //}
                //File.Delete(outputDir + audioFileName + "." + suffix.ToString() + ".png");
                File.Move(imagePath, imageName + "." + suffix.ToString() + ".png");
            }
            //string newPath = imagePath + suffix + ".png";
            if (DRAW_SONOGRAMS == 2)
            {
                DrawSonogram(sonogram, imagePath, hits, scores, predictedEvents, eventThreshold);
            }
            else
            if ((DRAW_SONOGRAMS == 1) && (predictedEvents.Count > 0))
            {
                DrawSonogram(sonogram, imagePath, hits, scores, predictedEvents, eventThreshold);
            }

            Log.WriteIfVerbose("Image saved to: " + imagePath);
            //string savePath = outputDir + Path.GetFileNameWithoutExtension(recordingPath);
            //string suffix = string.Empty;
            //Image im = sonogram.GetImage(false, false);
            //string newPath = savePath + suffix + ".jpg";
            //im.Save(newPath);

            LoggedConsole.WriteLine("\nFINISHED RECORDING!");
            Console.ReadLine();
        }
コード例 #17
0
        public static (List <AcousticEvent>, double[], double[]) GetComponentsWithHarmonics(
            SpectrogramStandard spectrogram,
            int minHz,
            int maxHz,
            int nyquist,
            double decibelThreshold,
            double dctThreshold,
            double minDuration,
            double maxDuration,
            int minFormantGap,
            int maxFormantGap,
            TimeSpan segmentStartOffset)
        {
            // Event threshold - Determines FP / FN trade-off for events.
            //double eventThreshold = 0.2;

            var sonogramData = spectrogram.Data;
            int frameCount   = sonogramData.GetLength(0);
            int binCount     = sonogramData.GetLength(1);

            double freqBinWidth = nyquist / (double)binCount;
            int    minBin       = (int)Math.Round(minHz / freqBinWidth);
            int    maxBin       = (int)Math.Round(maxHz / freqBinWidth);

            // extract the sub-band
            double[,] subMatrix = MatrixTools.Submatrix(spectrogram.Data, 0, minBin, frameCount - 1, maxBin);

            //ii: DETECT HARMONICS
            // now look for harmonics in search band using the Xcorrelation-DCT method.
            var results = CrossCorrelation.DetectHarmonicsInSpectrogramData(subMatrix, decibelThreshold);

            // set up score arrays
            double[] dBArray = results.Item1;
            double[] harmonicIntensityScores = results.Item2; //an array of formant intesnity
            int[]    maxIndexArray           = results.Item3;

            for (int r = 0; r < frameCount; r++)
            {
                if (harmonicIntensityScores[r] < dctThreshold)
                {
                    continue;
                }

                //ignore locations with incorrect formant gap
                int    maxId        = maxIndexArray[r];
                int    bandBinCount = maxBin - minBin + 1;
                double freqBinGap   = 2 * bandBinCount / (double)maxId;
                double formantGap   = freqBinGap * freqBinWidth;
                if (formantGap < minFormantGap || formantGap > maxFormantGap)
                {
                    harmonicIntensityScores[r] = 0.0;
                }
            }

            // smooth the harmonicIntensityScores array to allow for brief gaps.
            harmonicIntensityScores = DataTools.filterMovingAverageOdd(harmonicIntensityScores, 3);

            //extract the events based on length and threshhold.
            // Note: This method does NOT do prior smoothing of the score array.
            var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
                harmonicIntensityScores,
                minHz,
                maxHz,
                spectrogram.FramesPerSecond,
                spectrogram.FBinWidth,
                dctThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            // add in temporary names to the events
            // These can be altered later.
            foreach (var ev in acousticEvents)
            {
                ev.SpeciesName = "NoName";
                ev.Name        = "Harmonics";
            }

            return(acousticEvents, dBArray, harmonicIntensityScores);
        }
コード例 #18
0
        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="sonoConfig"></param>
        /// <param name="lbConfig"></param>
        /// <param name="drawDebugImage"></param>
        /// <param name="segmentStartOffset"></param>
        /// <returns></returns>
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LitoriaBicolorConfig lbConfig,
            bool drawDebugImage,
            TimeSpan segmentStartOffset)
        {
            double decibelThreshold   = lbConfig.DecibelThreshold; //dB
            double intensityThreshold = lbConfig.IntensityThreshold;

            //double eventThreshold = lbConfig.EventThreshold; //in 0-1

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            //TimeSpan tsRecordingtDuration = recording.Duration();
            int    sr              = recording.SampleRate;
            double freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double framesPerSecond = freqBinWidth;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double dctDuration = 3 * lbConfig.MaxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            int upperBandMinBin = (int)Math.Round(lbConfig.UpperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(lbConfig.UpperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lbConfig.LowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lbConfig.LowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            //lowerArray = DataTools.filterMovingAverage(lowerArray, 3);
            //upperArray = DataTools.filterMovingAverage(upperArray, 3);

            double[] amplitudeScores  = DataTools.SumMinusDifference(lowerArray, upperArray);
            double[] differenceScores = DspFilters.PreEmphasis(amplitudeScores, 1.0);

            // Could smooth here rather than above. Above seemed slightly better?
            amplitudeScores  = DataTools.filterMovingAverage(amplitudeScores, 7);
            differenceScores = DataTools.filterMovingAverage(differenceScores, 7);

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            var predictedEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lbConfig.LowerBandMinHz,
                lbConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                decibelThreshold,
                lbConfig.MinDuration,
                lbConfig.MaxDuration,
                segmentStartOffset);

            for (int i = 0; i < differenceScores.Length; i++)
            {
                if (differenceScores[i] < 1.0)
                {
                    differenceScores[i] = 0.0;
                }
            }

            // init the score array
            double[] scores = new double[rowCount];

            //iii: CONVERT SCORES TO ACOUSTIC EVENTS
            // var hits = new double[rowCount, colCount];
            double[,] hits = null;

            // init confirmed events
            var confirmedEvents = new List <AcousticEvent>();

            // add names into the returned events
            foreach (var ae in predictedEvents)
            {
                //rowtop,  rowWidth
                int    eventStart       = ae.Oblong.RowTop;
                int    eventWidth       = ae.Oblong.RowWidth;
                int    step             = 2;
                double maximumIntensity = 0.0;

                // scan the event to get oscillation period and intensity
                for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step)
                {
                    // Look for oscillations in the difference array
                    double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength);
                    double   oscilFreq;
                    double   period;
                    double   intensity;
                    Oscillations2014.GetOscillation(differenceArray, framesPerSecond, cosines, out oscilFreq, out period, out intensity);

                    bool periodWithinBounds = period > lbConfig.MinPeriod && period < lbConfig.MaxPeriod;

                    //Console.WriteLine($"step={i}    period={period:f4}");

                    if (!periodWithinBounds)
                    {
                        continue;
                    }

                    for (int j = 0; j < dctLength; j++) //lay down score for sample length
                    {
                        if (scores[i + j] < intensity)
                        {
                            scores[i + j] = intensity;
                        }
                    }

                    if (maximumIntensity < intensity)
                    {
                        maximumIntensity = intensity;
                    }
                }

                // add abbreviatedSpeciesName into event
                if (maximumIntensity >= intensityThreshold)
                {
                    ae.Name             = "L.b";
                    ae.Score_MaxInEvent = maximumIntensity;
                    confirmedEvents.Add(ae);
                }
            }

            //######################################################################

            // calculate the cosine similarity scores
            var scorePlot = new Plot(lbConfig.SpeciesName, scores, intensityThreshold);

            //DEBUG IMAGE this recognizer only. MUST set false for deployment.
            Image debugImage = null;

            if (drawDebugImage)
            {
                // display a variety of debug score arrays
                double[] normalisedScores;
                double   normalisedThreshold;

                //DataTools.Normalise(scores, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var debugPlot = new Plot("Score", normalisedScores, normalisedThreshold);
                //DataTools.Normalise(upperArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var upperPlot = new Plot("Upper", normalisedScores, normalisedThreshold);
                //DataTools.Normalise(lowerArray, eventDecibelThreshold, out normalisedScores, out normalisedThreshold);
                //var lowerPlot = new Plot("Lower", normalisedScores, normalisedThreshold);
                DataTools.Normalise(amplitudeScores, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var sumDiffPlot = new Plot("SumMinusDifference", normalisedScores, normalisedThreshold);
                DataTools.Normalise(differenceScores, 3.0, out normalisedScores, out normalisedThreshold);
                var differencePlot = new Plot("Difference", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, sumDiffPlot, differencePlot
                };

                // other debug plots
                //var debugPlots = new List<Plot> { scorePlot, upperPlot, lowerPlot, sumDiffPlot, differencePlot };
                debugImage = DisplayDebugImage(sonogram, confirmedEvents, debugPlots, hits);
            }

            // return new sonogram because it makes for more easy interpretation of the image
            var returnSonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = 512,
                WindowOverlap = 0,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader);

            return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage));
        } //Analysis()
コード例 #19
0
        /// <summary>
        /// ################ THE KEY ANALYSIS METHOD for TRILLS
        ///
        ///  See Anthony's ExempliGratia.Recognize() method in order to see how to use methods for config profiles.
        ///  </summary>
        /// <param name="recording"></param>
        /// <param name="sonoConfig"></param>
        /// <param name="lwConfig"></param>
        /// <param name="returnDebugImage"></param>
        /// <param name="segmentStartOffset"></param>
        /// <returns></returns>
        private static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, Image> Analysis(
            AudioRecording recording,
            SonogramConfig sonoConfig,
            LitoriaWatjulumConfig lwConfig,
            bool returnDebugImage,
            TimeSpan segmentStartOffset)
        {
            double intensityThreshold = lwConfig.IntensityThreshold;
            double minDuration        = lwConfig.MinDurationOfTrill; // seconds
            double maxDuration        = lwConfig.MaxDurationOfTrill; // seconds
            double minPeriod          = lwConfig.MinPeriod;          // seconds
            double maxPeriod          = lwConfig.MaxPeriod;          // seconds

            if (recording == null)
            {
                LoggedConsole.WriteLine("AudioRecording == null. Analysis not possible.");
                return(null);
            }

            //i: MAKE SONOGRAM
            //TimeSpan tsRecordingtDuration = recording.Duration();
            int    sr              = recording.SampleRate;
            double freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double framesPerSecond = freqBinWidth;

            // duration of DCT in seconds - want it to be about 3X or 4X the expected maximum period
            double dctDuration = 4 * maxPeriod;

            // duration of DCT in frames
            int dctLength = (int)Math.Round(framesPerSecond * dctDuration);

            // set up the cosine coefficients
            double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength);

            int upperBandMinBin = (int)Math.Round(lwConfig.UpperBandMinHz / freqBinWidth) + 1;
            int upperBandMaxBin = (int)Math.Round(lwConfig.UpperBandMaxHz / freqBinWidth) + 1;
            int lowerBandMinBin = (int)Math.Round(lwConfig.LowerBandMinHz / freqBinWidth) + 1;
            int lowerBandMaxBin = (int)Math.Round(lwConfig.LowerBandMaxHz / freqBinWidth) + 1;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);

            //int colCount = sonogram.Data.GetLength(1);

            double[] lowerArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, lowerBandMinBin, rowCount - 1, lowerBandMaxBin);
            double[] upperArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, upperBandMinBin, rowCount - 1, upperBandMaxBin);

            //lowerArray = DataTools.filterMovingAverage(lowerArray, 3);
            //upperArray = DataTools.filterMovingAverage(upperArray, 3);

            double[] amplitudeScores  = DataTools.SumMinusDifference(lowerArray, upperArray);
            double[] differenceScores = DspFilters.SubtractBaseline(amplitudeScores, 7);

            // Could smooth here rather than above. Above seemed slightly better?
            //amplitudeScores = DataTools.filterMovingAverage(amplitudeScores, 7);
            //differenceScores = DataTools.filterMovingAverage(differenceScores, 7);

            //iii: CONVERT decibel sum-diff SCORES TO ACOUSTIC TRILL EVENTS
            var predictedTrillEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lwConfig.LowerBandMinHz,
                lwConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                lwConfig.DecibelThreshold,
                minDuration,
                maxDuration,
                segmentStartOffset);

            for (int i = 0; i < differenceScores.Length; i++)
            {
                if (differenceScores[i] < 1.0)
                {
                    differenceScores[i] = 0.0;
                }
            }

            // LOOK FOR TRILL EVENTS
            // init the score array
            double[] scores = new double[rowCount];

            // var hits = new double[rowCount, colCount];
            double[,] hits = null;

            // init confirmed events
            var confirmedEvents = new List <AcousticEvent>();

            // add names into the returned events
            foreach (var ae in predictedTrillEvents)
            {
                int    eventStart       = ae.Oblong.RowTop;
                int    eventWidth       = ae.Oblong.RowWidth;
                int    step             = 2;
                double maximumIntensity = 0.0;

                // scan the event to get oscillation period and intensity
                for (int i = eventStart - (dctLength / 2); i < eventStart + eventWidth - (dctLength / 2); i += step)
                {
                    // Look for oscillations in the difference array
                    double[] differenceArray = DataTools.Subarray(differenceScores, i, dctLength);
                    Oscillations2014.GetOscillationUsingDct(differenceArray, framesPerSecond, cosines, out var oscilFreq, out var period, out var intensity);

                    bool periodWithinBounds = period > minPeriod && period < maxPeriod;

                    //Console.WriteLine($"step={i}    period={period:f4}");

                    if (!periodWithinBounds)
                    {
                        continue;
                    }

                    for (int j = 0; j < dctLength; j++) //lay down score for sample length
                    {
                        if (scores[i + j] < intensity)
                        {
                            scores[i + j] = intensity;
                        }
                    }

                    if (maximumIntensity < intensity)
                    {
                        maximumIntensity = intensity;
                    }
                }

                // add abbreviatedSpeciesName into event
                if (maximumIntensity >= intensityThreshold)
                {
                    ae.Name             = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[0]}";
                    ae.Score_MaxInEvent = maximumIntensity;
                    ae.Profile          = lwConfig.ProfileNames[0];
                    confirmedEvents.Add(ae);
                }
            }

            //######################################################################
            // LOOK FOR TINK EVENTS
            // CONVERT decibel sum-diff SCORES TO ACOUSTIC EVENTS
            double minDurationOfTink = lwConfig.MinDurationOfTink;  // seconds
            double maxDurationOfTink = lwConfig.MaxDurationOfTink;  // seconds

            // want stronger threshold for tink because brief.
            double tinkDecibelThreshold = lwConfig.DecibelThreshold + 3.0;
            var    predictedTinkEvents  = AcousticEvent.ConvertScoreArray2Events(
                amplitudeScores,
                lwConfig.LowerBandMinHz,
                lwConfig.UpperBandMaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                tinkDecibelThreshold,
                minDurationOfTink,
                maxDurationOfTink,
                segmentStartOffset);

            foreach (var ae2 in predictedTinkEvents)
            {
                // Prune the list of potential acoustic events, for example using Cosine Similarity.

                //rowtop,  rowWidth
                //int eventStart = ae2.Oblong.RowTop;
                //int eventWidth = ae2.Oblong.RowWidth;
                //int step = 2;
                //double maximumIntensity = 0.0;

                // add abbreviatedSpeciesName into event
                //if (maximumIntensity >= intensityThreshold)
                //{
                ae2.Name = $"{lwConfig.AbbreviatedSpeciesName}.{lwConfig.ProfileNames[1]}";

                //ae2.Score_MaxInEvent = maximumIntensity;
                ae2.Profile = lwConfig.ProfileNames[1];
                confirmedEvents.Add(ae2);

                //}
            }

            //######################################################################

            var   scorePlot  = new Plot(lwConfig.SpeciesName, scores, intensityThreshold);
            Image debugImage = null;

            if (returnDebugImage)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(amplitudeScores, lwConfig.DecibelThreshold, out var normalisedScores, out var normalisedThreshold);
                var sumDiffPlot = new Plot("Sum Minus Difference", normalisedScores, normalisedThreshold);
                DataTools.Normalise(differenceScores, lwConfig.DecibelThreshold, out normalisedScores, out normalisedThreshold);
                var differencePlot = new Plot("Baseline Removed", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    scorePlot, sumDiffPlot, differencePlot
                };
                debugImage = DrawDebugImage(sonogram, confirmedEvents, debugPlots, hits);
            }

            // return new sonogram because it makes for more easy interpretation of the image
            var returnSonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = 512,
                WindowOverlap = 0,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            };
            BaseSonogram returnSonogram = new SpectrogramStandard(returnSonoConfig, recording.WavReader);

            return(Tuple.Create(returnSonogram, hits, scores, confirmedEvents, debugImage));
        } //Analysis()
コード例 #20
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // BETTER TO CALCULATE THIS. IGNORE USER!
            // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]);

            // duration of DCT in seconds
            double dctDuration = configuration.GetDouble(AnalysisKeys.DctDuration);

            // minimum acceptable value of a DCT coefficient
            double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold);

            // ignore oscillations below this threshold freq
            int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq);

            // ignore oscillations above this threshold freq
            int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in seconds
            double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            if (recording.WavReader.SampleRate != 22050)
            {
                throw new InvalidOperationException("Requires a 22050Hz file");
            }

            // The default was 512 for Canetoad.
            // Framesize = 128 seems to work for Littoria fallax.
            const int FrameSize     = 128;
            double    windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap(
                recording.SampleRate,
                FrameSize,
                maxOscilFreq);

            //windowOverlap = 0.75; // previous default

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = FrameSize,
                WindowOverlap = windowOverlap,

                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.1,
            };

            // sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("STANDARD");
            TimeSpan     recordingDuration = recording.Duration;
            int          sr           = recording.SampleRate;
            double       freqBinWidth = sr / (double)sonoConfig.WindowSize;
            BaseSonogram sonogram     = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount     = sonogram.Data.GetLength(0);
            int          colCount     = sonogram.Data.GetLength(1);

            // double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, (rowCount - 1), maxbin);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            // This window is used to smooth the score array before extracting events.
            // A short window (e.g. 3) preserves sharper score edges to define events but also keeps noise.
            int scoreSmoothingWindow = 13;

            double[]             scores; // predefinition of score array
            List <AcousticEvent> acousticEvents;

            double[,] hits;
            Oscillations2012.Execute(
                (SpectrogramStandard)sonogram,
                minHz,
                maxHz,
                dctDuration,
                minOscilFreq,
                maxOscilFreq,
                dctThreshold,
                eventThreshold,
                minDuration,
                maxDuration,
                scoreSmoothingWindow,
                out scores,
                out acousticEvents,
                out hits,
                segmentStartOffset);

            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = speciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.Name = abbreviatedSpeciesName;
            });

            var plot  = new Plot(this.DisplayName, scores, eventThreshold);
            var plots = new List <Plot> {
                plot
            };

            this.WriteDebugImage(recording, outputDirectory, sonogram, acousticEvents, plots, hits);

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plots,
                Events = acousticEvents,
            });
        }
コード例 #21
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            // common properties
            var speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            var abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // BETTER TO CALCULATE THIS. IGNORE USER!
            // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]);

            // duration of DCT in seconds
            double dctDuration = configuration.GetDouble(AnalysisKeys.DctDuration);

            // minimum acceptable value of a DCT coefficient
            double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold);

            // ignore oscillations below this threshold freq
            int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq);

            // ignore oscillations above this threshold freq
            int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in seconds
            double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            // this default framesize seems to work for Canetoad
            const int frameSize     = 512;
            double    windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap(
                recording.SampleRate,
                frameSize,
                maxOscilFreq);

            //windowOverlap = 0.75; // previous default

            // DEBUG: Following line used to search for where indeterminism creeps into the spectrogram values which vary from run to run.
            //FileTools.AddArrayAdjacentToExistingArrays(Path.Combine(outputDirectory.FullName, recording.BaseName+"_RecordingSamples.csv"), recording.WavReader.GetChannel(0));

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // the default window is HAMMING
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),
                // if do not use noise reduction can get a more sensitive recogniser.
                NoiseReductionType = NoiseReductionType.None,
            };

            // sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("STANDARD");
            TimeSpan recordingDuration = recording.Duration;

            //int sr = recording.SampleRate;
            //double freqBinWidth = sr / (double)sonoConfig.WindowSize;

            /* #############################################################################################################################################
             * window    sr          frameDuration   frames/sec  hz/bin  64frameDuration hz/64bins       hz/128bins
             * 1024     22050       46.4ms          21.5        21.5    2944ms          1376hz          2752hz
             * 1024     17640       58.0ms          17.2        17.2    3715ms          1100hz          2200hz
             * 2048     17640       116.1ms          8.6         8.6    7430ms           551hz          1100hz
             */

            // int minBin = (int)Math.Round(minHz / freqBinWidth) + 1;
            // int maxbin = minBin + numberOfBins - 1;
            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            //int rowCount = sonogram.Data.GetLength(0);
            //int colCount = sonogram.Data.GetLength(1);

            // DEBUG: Following lines used to search for where indeterminism creeps into the spectrogram values which vary from run to run.
            //double[] array = DataTools.Matrix2Array(sonogram.Data);
            //FileTools.AddArrayAdjacentToExistingArrays(Path.Combine(outputDirectory.FullName, recording.BaseName+".csv"), array);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            double minDurationOfAdvertCall = minDuration; // this boundary duration should = 5.0 seconds as of 4 June 2015.

            //double minDurationOfReleaseCall = 1.0;
            double[]             scores; // predefinition of score array
            List <AcousticEvent> events;

            double[,] hits;
            Oscillations2012.Execute(
                (SpectrogramStandard)sonogram,
                minHz,
                maxHz,
                dctDuration,
                minOscilFreq,
                maxOscilFreq,
                dctThreshold,
                eventThreshold,
                minDurationOfAdvertCall,
                maxDuration,
                out scores,
                out events,
                out hits,
                segmentStartOffset);

            // DEBUG: Following line used to search for where indeterminism creeps into the event detection
            //FileTools.AddArrayAdjacentToExistingArrays(Path.Combine(outputDirectory.FullName, recording.BaseName+"_ScoreArray.csv"), scores);

            var prunedEvents = new List <AcousticEvent>();

            foreach (AcousticEvent ae in events)
            {
                //if (ae.Duration < minDurationOfReleaseCall) { continue; }
                if (ae.EventDurationSeconds < minDurationOfAdvertCall)
                {
                    continue;
                }

                if (ae.EventDurationSeconds > maxDuration)
                {
                    continue;
                }

                // add additional info
                ae.SpeciesName            = speciesName;
                ae.Name                   = abbreviatedSpeciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                prunedEvents.Add(ae);

                //if (ae.Duration >= minDurationOfAdvertCall)
                //{
                //    ae.Name = abbreviatedSpeciesName; // + ".AdvertCall";
                //    prunedEvents.Add(ae);
                //    continue;
                //}
            }

            // do a recognizer test.
            if (false)
            {
                if (MainEntry.InDEBUG)
                {
                    RecognizerTest(scores, new FileInfo(recording.FilePath));
                    RecognizerTest(prunedEvents, new FileInfo(recording.FilePath));
                }
            }

            var plot = new Plot(this.DisplayName, scores, eventThreshold);

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = prunedEvents,

                //Events = events
            });
        }