public AudioRecordingStopsAfterRecording(
     AudioRecording anAudioRecording,
     Microphone aMicrophone)
 {
     audioRecording = anAudioRecording;
     microphone = aMicrophone;
 }
 public AudioRecordingSucceedsWithMicrophone(
     AudioRecording anAudioRecording,
     Microphone aMicrophone)
 {
     audioRecording = anAudioRecording;
     microphone = aMicrophone;
 }
 public AudioRecordingFailsWithoutMicrophone(
     AudioRecording aPreviousAudioRecording,
     AudioRecording aNewAudioRecording,
     Microphone aMicrophone)
 {
     previousAudioRecording = aPreviousAudioRecording;
     audioRecording = aNewAudioRecording;
     microphone = aMicrophone;
 }
Example #4
0
        private void WriteDebugImage(
            AudioRecording recording,
            DirectoryInfo outputDirectory,
            BaseSonogram sonogram,
            List <AcousticEvent> acousticEvents,
            List <Plot> plots,
            double[,] hits)
        {
            //DEBUG IMAGE this recogniser only. MUST set false for deployment.
            bool displayDebugImage = MainEntry.InDEBUG;

            if (displayDebugImage)
            {
                Image debugImage1 = SpectrogramTools.GetSonogramPlusCharts(sonogram, acousticEvents, plots, hits);
                var   debugPath1  =
                    outputDirectory.Combine(
                        FilenameHelpers.AnalysisResultName(
                            Path.GetFileNameWithoutExtension(recording.BaseName),
                            this.Identifier,
                            "png",
                            "DebugSpectrogram1"));
                debugImage1.Save(debugPath1.FullName);

                // save new image with longer frame
                var sonoConfig2 = new SonogramConfig
                {
                    SourceFName        = recording.BaseName,
                    WindowSize         = 1024,
                    WindowOverlap      = 0,
                    NoiseReductionType = NoiseReductionType.None,

                    //NoiseReductionType = NoiseReductionType.STANDARD,
                    //NoiseReductionParameter = 0.1
                };
                BaseSonogram sonogram2 = new SpectrogramStandard(sonoConfig2, recording.WavReader);

                var debugPath2 =
                    outputDirectory.Combine(
                        FilenameHelpers.AnalysisResultName(
                            Path.GetFileNameWithoutExtension(recording.BaseName),
                            this.Identifier,
                            "png",
                            "DebugSpectrogram2"));
                Image debugImage2 = SpectrogramTools.GetSonogramPlusCharts(sonogram2, acousticEvents, plots, null);
                debugImage2.Save(debugPath2.FullName);
            }
        }
        public static void Main(Arguments arguments)
        {
            // 1. set up the necessary files
            var           sourceRecording = arguments.Source;
            var           configInfo      = ConfigFile.Deserialize <SpectrogramGeneratorConfig>(arguments.Config.ToFileInfo());
            DirectoryInfo output          = arguments.Output;

            if (!output.Exists)
            {
                output.Create();
            }

            //if (arguments.StartOffset.HasValue ^ arguments.EndOffset.HasValue)
            //{
            //    throw new InvalidStartOrEndException("If StartOffset or EndOffset is specified, then both must be specified");
            //}
            // set default offsets - only use defaults if not provided in arguments list
            // var offsetsProvided = arguments.StartOffset.HasValue && arguments.EndOffset.HasValue;
            //TimeSpan? startOffset;
            //TimeSpan? endOffset;
            //if (offsetsProvided)
            //{
            //    startOffset = TimeSpan.FromSeconds(arguments.StartOffset.Value);
            //    endOffset = TimeSpan.FromSeconds(arguments.EndOffset.Value);
            //}

            const string title = "# MAKE MULTIPLE SONOGRAMS FROM AUDIO RECORDING";
            string       date  = "# DATE AND TIME: " + DateTime.Now;

            LoggedConsole.WriteLine(title);
            LoggedConsole.WriteLine(date);
            LoggedConsole.WriteLine("# Input  audio file: " + sourceRecording.Name);

            // 3: CREATE A TEMPORARY RECORDING
            int resampleRate     = configInfo.GetIntOrNull("ResampleRate") ?? 22050;
            var tempAudioSegment = AudioRecording.CreateTemporaryAudioFile(sourceRecording, output, resampleRate);

            // 4: GENERATE SPECTROGRAM images
            //string sourceName = sourceRecording.FullName;
            string sourceName = Path.GetFileNameWithoutExtension(sourceRecording.FullName);
            var    result     = SpectrogramGenerator.GenerateSpectrogramImages(tempAudioSegment, configInfo, sourceName);

            // 5: Save the image
            var outputImageFile = new FileInfo(Path.Combine(output.FullName, sourceName + ".Spectrograms.png"));

            result.CompositeImage.Save(outputImageFile.FullName);
        }
        public static double[,] GetAmplitudeSpectrogramNoiseReduced(AudioRecording recording, int frameSize)
        {
            int frameStep = frameSize;

            // get amplitude spectrogram and remove the DC column ie column zero.
            var results = DSP_Frames.ExtractEnvelopeAndAmplSpectrogram(recording.WavReader.Samples, recording.SampleRate, recording.Epsilon, frameSize, frameStep);

            // remove background noise from the full amplitude spectrogram
            const double sdCount             = 0.1;
            const double spectralBgThreshold = 0.003;                                                                          // SPECTRAL AMPLITUDE THRESHOLD for smoothing background
            var          profile             = NoiseProfile.CalculateModalNoiseProfile(results.AmplitudeSpectrogram, sdCount); //calculate noise profile - assumes a dB spectrogram.

            double[] noiseValues          = DataTools.filterMovingAverage(profile.NoiseThresholds, 7);                         // smooth the noise profile
            var      amplitudeSpectrogram = SNR.NoiseReduce_Standard(results.AmplitudeSpectrogram, noiseValues, spectralBgThreshold);

            return(amplitudeSpectrogram);
        }
        public void TestBinaryClusteringOfSpectra()
        {
            //string wavFilePath = @"C:\SensorNetworks\WavFiles\TestRecordings\BAC\BAC2_20071008-085040.wav";
            var wavFilePath = PathHelper.ResolveAsset(@"Recordings", "BAC2_20071008-085040.wav");

            // var outputDir = this.outputDirectory;
            var outputDir = PathHelper.ResolveAssetPath("BinaryClustering"); // only use this to write expected output.

            int frameSize = 512;
            var recording = new AudioRecording(wavFilePath); // get recording segment

            // for deriving binary spectrogram
            double binaryThreshold = SpectralClustering.DefaultBinaryThresholdInDecibels; // A decibel threshold for post noise removal

            double[,] spectrogramData = SpectralClustering.GetDecibelSpectrogramNoiseReduced(recording, frameSize);

            // We only use the midband of the Spectrogram, i.e. the band between lowerBinBound and upperBinBound.
            // In June 2016, the mid-band was set to lowerBound=1000Hz, upperBound=8000hz, because this band contains most bird activity, i.e. it is the Bird-Band
            // This was done in order to make the cluster summary indices more reflective of bird call activity.
            int    freqBinCount   = spectrogramData.GetLength(1);
            double binWidth       = recording.Nyquist / (double)freqBinCount;
            int    lowerFreqBound = 1000;
            int    lowerBinBound  = (int)Math.Ceiling(lowerFreqBound / binWidth);
            int    upperFreqBound = 8000;
            int    upperBinBound  = (int)Math.Ceiling(upperFreqBound / binWidth);

            var midBandSpectrogram = MatrixTools.Submatrix(spectrogramData, 0, lowerBinBound, spectrogramData.GetLength(0) - 1, upperBinBound);
            var clusterInfo        = SpectralClustering.ClusterTheSpectra(midBandSpectrogram, lowerBinBound, upperBinBound, binaryThreshold);

            // transfer cluster info to spectral index results
            var clusterSpectrum = SpectralClustering.RestoreFullLengthSpectrum(clusterInfo.ClusterSpectrum, freqBinCount, lowerBinBound);

            // test the cluster count - also called spectral diversity in some papers
            Assert.AreEqual(clusterInfo.ClusterCount, 17);

            // test the trigram count - another way of thinking about spectral change
            Assert.AreEqual(clusterInfo.TriGramUniqueCount, 342);

            // test what used to be the CLS spectral index. Sum of the rows of the weight vectors.
            var expectedSpectrumFile = new FileInfo(outputDir + "\\clusterSpectrum.bin");

            // Binary.Serialize(expectedSpectrumFile, clusterSpectrum);
            var expectedVector = Binary.Deserialize <double[]>(expectedSpectrumFile);

            CollectionAssert.AreEqual(expectedVector, clusterSpectrum);
        }
        public AnalysisResult2 Analyze <T>(AnalysisSettings analysisSettings, SegmentSettings <T> segmentSettings)
        {
            var audioFile       = segmentSettings.SegmentAudioFile;
            var recording       = new AudioRecording(audioFile.FullName);
            var outputDirectory = segmentSettings.SegmentOutputDirectory;

            var    analysisResult = new AnalysisResult2(analysisSettings, segmentSettings, recording.Duration);
            Config configuration  = ConfigFile.Deserialize(analysisSettings.ConfigFile);

            bool saveCsv = analysisSettings.AnalysisDataSaveBehavior;

            if (configuration.GetBool(AnalysisKeys.MakeSoxSonogram))
            {
                Log.Warn("SoX spectrogram generation config variable found (and set to true) but is ignored when running as an IAnalyzer");
            }

            // generate spectrogram
            var configurationDictionary = new Dictionary <string, string>(configuration.ToDictionary());

            configurationDictionary[ConfigKeys.Recording.Key_RecordingCallName] = audioFile.FullName;
            configurationDictionary[ConfigKeys.Recording.Key_RecordingFileName] = audioFile.Name;
            var soxImage = new FileInfo(Path.Combine(segmentSettings.SegmentOutputDirectory.FullName, audioFile.Name + ".SOX.png"));

            var spectrogramResult = Audio2Sonogram.GenerateFourSpectrogramImages(
                audioFile,
                soxImage,
                configurationDictionary,
                dataOnly: analysisSettings.AnalysisImageSaveBehavior.ShouldSave(analysisResult.Events.Length),
                makeSoxSonogram: false);

            // this analysis produces no results!
            // but we still print images (that is the point)
            if (analysisSettings.AnalysisImageSaveBehavior.ShouldSave(analysisResult.Events.Length))
            {
                Debug.Assert(segmentSettings.SegmentImageFile.Exists);
            }

            if (saveCsv)
            {
                var basename           = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name);
                var spectrogramCsvFile = outputDirectory.CombineFile(basename + ".Spectrogram.csv");
                Csv.WriteMatrixToCsv(spectrogramCsvFile, spectrogramResult.DecibelSpectrogram.Data, TwoDimensionalArray.None);
            }

            return(analysisResult);
        }
Example #9
0
        /// <summary>
        /// METHOD TO CHECK IF Octave FREQ SCALE IS WORKING
        /// Check it on MARINE RECORDING from JASCO, SR=64000.
        /// 24 BIT JASCO RECORDINGS from GBR must be converted to 16 bit.
        /// ffmpeg -i source_file.wav -sample_fmt s16 out_file.wav
        /// e.g. ". C:\Work\Github\audio-analysis\Extra Assemblies\ffmpeg\ffmpeg.exe" -i "C:\SensorNetworks\WavFiles\MarineRecordings\JascoGBR\AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56.wav" -sample_fmt s16 "C:\SensorNetworks\Output\OctaveFreqScale\JascoeMarineGBR116bit.wav"
        /// ffmpeg binaries are in C:\Work\Github\audio-analysis\Extra Assemblies\ffmpeg
        /// </summary>
        public static void TESTMETHOD_OctaveFrequencyScale2()
        {
            var recordingPath      = @"C:\SensorNetworks\SoftwareTests\TestRecordings\MarineJasco_AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56-16bit.wav";
            var outputDir          = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo();
            var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo();
            var outputImagePath    = Path.Combine(outputDir.FullName, "JascoMarineGBR1.png");
            var opFileStem         = "JascoMarineGBR1";

            var recording = new AudioRecording(recordingPath);
            var fst       = FreqScaleType.Linear125Octaves7Tones28Nyquist32000;
            var freqScale = new FrequencyScale(fst);

            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.WindowSize,
                WindowOverlap           = 0.2,
                SourceFName             = recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);

            sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale);

            // DO NOISE REDUCTION
            var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data);

            sonogram.Data = dataMatrix;
            sonogram.Configuration.WindowSize = freqScale.WindowSize;

            var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            image.Save(outputImagePath);

            // DO FILE EQUALITY TEST
            string testName         = "test2";
            var    expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "FrequencyOctaveScaleTest2.EXPECTED.json"));
            var    resultFile       = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "FrequencyOctaveScaleTest2Results.json"));

            Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations);
            TestTools.FileEqualityTest(testName, resultFile, expectedTestFile);

            LoggedConsole.WriteLine("Completed Octave Frequency Scale " + testName);
            Console.WriteLine("\n\n");
        }
Example #10
0
        public static Tuple <BaseSonogram, double[, ], double[], List <AcousticEvent>, double[], TimeSpan> Execute_ODDetect(FileInfo wavPath,
                                                                                                                            bool doSegmentation, int minHz, int maxHz, double frameOverlap, double dctDuration, double dctThreshold, int minOscilFreq, int maxOscilFreq,
                                                                                                                            double eventThreshold, double minDuration, double maxDuration)
        {
            //i: GET RECORDING
            AudioRecording recording = new AudioRecording(wavPath.FullName);

            //if (recording.SampleRate != 22050) recording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE
            int sr = recording.SampleRate;

            //ii: MAKE SONOGRAM
            Log.WriteLine("Start sonogram.");
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.WindowOverlap = frameOverlap;
            sonoConfig.SourceFName   = recording.BaseName;
            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            Log.WriteLine("Signal: Duration={0}, Sample Rate={1}", sonogram.Duration, sr);
            Log.WriteLine("Frames: Size={0}, Count={1}, Duration={2:f1}ms, Overlap={5:f0}%, Offset={3:f1}ms, Frames/s={4:f1}",
                          sonogram.Configuration.WindowSize, sonogram.FrameCount, sonogram.FrameDuration * 1000,
                          sonogram.FrameStep * 1000, sonogram.FramesPerSecond, frameOverlap);
            int binCount = (int)(maxHz / sonogram.FBinWidth) - (int)(minHz / sonogram.FBinWidth) + 1;

            Log.WriteIfVerbose("Freq band: {0} Hz - {1} Hz. (Freq bin count = {2})", minHz, maxHz, binCount);

            Log.WriteIfVerbose("DctDuration=" + dctDuration + "sec.  (# frames=" + (int)Math.Round(dctDuration * sonogram.FramesPerSecond) + ")");
            Log.WriteIfVerbose("Score threshold for oscil events=" + eventThreshold);
            Log.WriteLine("Start OD event detection");

            //iii: DETECT OSCILLATIONS
            bool normaliseDCT = true;
            List <AcousticEvent> predictedEvents; //predefinition of results event list

            double[] scores;                      //predefinition of score array
            double[,] hits;                       //predefinition of hits matrix - to superimpose on sonogram image
            double[] segments;                    //predefinition of segmentation of recording
            TimeSpan analysisTime;                //predefinition of Time duration taken to do analysis on this file

            Oscillations2010.Execute((SpectrogramStandard)sonogram, doSegmentation, minHz, maxHz, dctDuration, dctThreshold, normaliseDCT,
                                     minOscilFreq, maxOscilFreq, eventThreshold, minDuration, maxDuration,
                                     out scores, out predictedEvents, out hits, out segments, out analysisTime);

            return(Tuple.Create(sonogram, hits, scores, predictedEvents, segments, analysisTime));
        }//end CaneToadRecogniser
Example #11
0
        /// <summary>
        /// This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="audioRecording">one minute of audio recording.</param>
        /// <param name="config">config file that contains parameters used by all profiles.</param>
        /// <param name="segmentStartOffset">when recording starts.</param>
        /// <param name="getSpectralIndexes">not sure what this is.</param>
        /// <param name="outputDirectory">where the recognizer results can be found.</param>
        /// <param name="imageWidth"> assuming ????.</param>
        /// <returns>recognizer results.</returns>
        public override RecognizerResults Recognize(
            AudioRecording audioRecording,
            Config config,
            TimeSpan segmentStartOffset,
            Lazy <IndexCalculateResult[]> getSpectralIndexes,
            DirectoryInfo outputDirectory,
            int?imageWidth)
        {
            //class NinoxBoobookConfig is define at bottom of this file.
            var genericConfig = (KoalaConfig)config;
            var recognizer    = new GenericRecognizer();

            RecognizerResults combinedResults = recognizer.Recognize(
                audioRecording,
                genericConfig,
                segmentStartOffset,
                getSpectralIndexes,
                outputDirectory,
                imageWidth);

            // DO POST-PROCESSING of EVENTS

            // Convert events to spectral events for possible combining.
            // Combine overlapping events. If the dB threshold is set low, may get lots of little events.
            var events         = combinedResults.NewEvents;
            var spectralEvents = events.Select(x => (SpectralEvent)x).ToList();
            var newEvents      = CompositeEvent.CombineOverlappingEvents(spectralEvents.Cast <EventCommon>().ToList());

            if (genericConfig.CombinePossibleSyllableSequence)
            {
                // convert events to spectral events for possible combining.
                //var spectralEvents = events.Select(x => (SpectralEvent)x).ToList();
                spectralEvents = newEvents.Cast <SpectralEvent>().ToList();
                var startDiff = genericConfig.SyllableStartDifference;
                var hertzDiff = genericConfig.SyllableHertzGap;
                newEvents = CompositeEvent.CombineSimilarProximalEvents(spectralEvents, TimeSpan.FromSeconds(startDiff), (int)hertzDiff);
            }

            combinedResults.NewEvents = newEvents;

            //UNCOMMENT following line if you want special debug spectrogram, i.e. with special plots.
            //  NOTE: Standard spectrograms are produced by setting SaveSonogramImages: "True" or "WhenEventsDetected" in <Towsey.PteropusSpecies.yml> config file.
            //GenericRecognizer.SaveDebugSpectrogram(territorialResults, genericConfig, outputDirectory, audioRecording.BaseName);
            return(combinedResults);
        }
        public void Setup()
        {
            this.outputDirectory = PathHelper.GetTempDir();
            this.recording       = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"));

            // specified linear scale
            this.freqScale = new FrequencyScale(nyquist: 11025, frameSize: 1024, hertzGridInterval: 1000);

            // set up the config for each spectrogram
            this.sonoConfig = new SonogramConfig
            {
                WindowSize              = this.freqScale.FinalBinCount * 2,
                WindowOverlap           = 0.2,
                SourceFName             = this.recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };
        }
Example #13
0
        public AnalysisResult2 Analyze <T>(AnalysisSettings analysisSettings, SegmentSettings <T> segmentSettings)
        {
            var audioFile           = segmentSettings.SegmentAudioFile;
            var recording           = new AudioRecording(audioFile.FullName);
            var sourceRecordingName = recording.BaseName;

            // TODO get the start and end-time offsets for accurate labeling of the time scale.
            //if (arguments.StartOffset.HasValue ^ arguments.EndOffset.HasValue)
            //{
            //    throw new InvalidStartOrEndException("If StartOffset or EndOffset is specified, then both must be specified");
            //}
            // set default offsets - only use defaults if not provided in arguments list
            // var offsetsProvided = arguments.StartOffset.HasValue && arguments.EndOffset.HasValue;
            //TimeSpan? startOffset;
            //TimeSpan? endOffset;
            //if (offsetsProvided)
            //{
            //    startOffset = TimeSpan.FromSeconds(arguments.StartOffset.Value);
            //    endOffset = TimeSpan.FromSeconds(arguments.EndOffset.Value);
            //}

            //var outputDirectory = segmentSettings.SegmentOutputDirectory;
            //bool saveCsv = analysisSettings.AnalysisDataSaveBehavior;

            var analysisResult    = new AnalysisResult2(analysisSettings, segmentSettings, recording.Duration);
            var configInfo        = ConfigFile.Deserialize <AnalyzerConfig>(analysisSettings.ConfigFile);
            var spectrogramResult = Audio2Sonogram.GenerateSpectrogramImages(audioFile, configInfo, sourceRecordingName);

            // this analysis produces no results! But we still print images (that is the point)
            // if (analysisSettings.AnalysisImageSaveBehavior.ShouldSave(analysisResult.Events.Length))
            // {
            //     Debug.Assert(condition: segmentSettings.SegmentImageFile.Exists, "Warning: Image file must exist.");
            spectrogramResult.CompositeImage.Save(segmentSettings.SegmentImageFile.FullName, ImageFormat.Png);
            // }

            //if (saveCsv)
            //{
            //    var basename = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name);
            //    var spectrogramCsvFile = outputDirectory.CombineFile(basename + ".Spectrogram.csv");
            //    Csv.WriteMatrixToCsv(spectrogramCsvFile, spectrogramResult.DecibelSpectrogram.Data, TwoDimensionalArray.None);
            //}

            return(analysisResult);
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="AmplitudeSpectrogram"/> class.
        /// </summary>
        public AmplitudeSpectrogram(SpectrogramSettings config, WavReader wav)
        {
            this.Configuration = config;
            this.Attributes    = new SpectrogramAttributes();

            double minDuration = 1.0;

            if (wav.Time.TotalSeconds < minDuration)
            {
                LoggedConsole.WriteLine("Signal must at least {0} seconds long to produce a sonogram!", minDuration);
                return;
            }

            //set attributes for the current recording and spectrogram type
            this.Attributes.SampleRate       = wav.SampleRate;
            this.Attributes.Duration         = wav.Time;
            this.Attributes.NyquistFrequency = wav.SampleRate / 2;
            this.Attributes.Duration         = wav.Time;
            this.Attributes.MaxAmplitude     = wav.CalculateMaximumAmplitude();
            this.Attributes.FrameDuration    = TimeSpan.FromSeconds(this.Configuration.WindowSize / (double)wav.SampleRate);

            var recording = new AudioRecording(wav);
            var fftdata   = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                config.WindowSize,
                config.WindowOverlap,
                this.Configuration.WindowFunction);

            // now recover required data
            //epsilon is a signal dependent minimum amplitude value to prevent possible subsequent log of zero value.
            this.Attributes.Epsilon     = fftdata.Epsilon;
            this.Attributes.WindowPower = fftdata.WindowPower;
            this.Attributes.FrameCount  = fftdata.FrameCount;
            this.Data = fftdata.AmplitudeSpectrogram;

            // IF REQUIRED CONVERT TO MEL SCALE
            if (this.Configuration.DoMelScale)
            {
                // this mel scale conversion uses the "Greg integral" !
                this.Data = MFCCStuff.MelFilterBank(this.Data, this.Configuration.MelBinCount, this.Attributes.NyquistFrequency, 0, this.Attributes.NyquistFrequency);
            }
        }
        public void PcaWhiteningDefault()
        {
            var recordingPath = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");

            var fst       = FreqScaleType.Linear;
            var freqScale = new FrequencyScale(fst);
            var recording = new AudioRecording(recordingPath);

            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.FinalBinCount * 2,
                WindowOverlap           = 0.2,
                SourceFName             = recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            // GENERATE AMPLITUDE SPECTROGRAM
            var spectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);

            spectrogram.Configuration.WindowSize = freqScale.WindowSize;

            // DO RMS NORMALIZATION
            spectrogram.Data = SNR.RmsNormalization(spectrogram.Data);

            // CONVERT NORMALIZED AMPLITUDE SPECTROGRAM TO dB SPECTROGRAM
            var sonogram = new SpectrogramStandard(spectrogram);

            // DO NOISE REDUCTION
            var dataMatrix = PcaWhitening.NoiseReduction(sonogram.Data);

            sonogram.Data = dataMatrix;

            // DO PCA WHITENING
            var whitenedSpectrogram = PcaWhitening.Whitening(sonogram.Data);

            // DO UNIT TESTING
            // check if the dimensions of the reverted spectrogram (second output of the pca whitening) is equal to the input matrix
            Assert.AreEqual(whitenedSpectrogram.Reversion.GetLength(0), sonogram.Data.GetLength(0));
            Assert.AreEqual(whitenedSpectrogram.Reversion.GetLength(1), sonogram.Data.GetLength(1));
        }
Example #16
0
        public void TestStandardNoiseRemoval()
        {
            var recording  = new AudioRecording(PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"));
            int windowSize = 512;
            var sr         = recording.SampleRate;

            // window overlap is used only for sonograms. It is not used when calculating acoustic indices.
            double windowOverlap  = 0.0;
            var    windowFunction = WindowFunctions.HAMMING.ToString();

            var fftdata = DSP_Frames.ExtractEnvelopeAndFfts(
                recording,
                windowSize,
                windowOverlap,
                windowFunction);

            // Now recover the data
            // The following data is required when constructing sonograms
            //var duration = recording.WavReader.Time;
            //var frameCount = fftdata.FrameCount;
            //var fractionOfHighEnergyFrames = fftdata.FractionOfHighEnergyFrames;

            double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(fftdata.AmplitudeSpectrogram, fftdata.WindowPower, sr, fftdata.Epsilon);

            // The following call to NoiseProfile.CalculateBackgroundNoise(double[,] spectrogram)
            // returns a noise profile that is used as the BGN spectral index.
            // It calculates the modal background noise for each freqeuncy bin and then returns a smoothed version.
            // By default, the number of SDs = 0 and the smoothing window = 7.
            // Method assumes that the passed spectrogram is oriented as: rows=frames, cols=freq bins.</param>

            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(deciBelSpectrogram);

            var resourcesDir         = PathHelper.ResolveAssetPath("Indices");
            var expectedSpectrumFile = new FileInfo(resourcesDir + "\\NoiseProfile.bin");

            //Binary.Serialize(expectedSpectrumFile, spectralDecibelBgn);
            var expectedVector = Binary.Deserialize <double[]>(expectedSpectrumFile);

            CollectionAssert.That.AreEqual(expectedVector, spectralDecibelBgn, 0.000_000_001);
        }
        } // LocalPeaks()

        /// <summary>
        /// CALCULATEs SPECTRAL PEAK TRACKS: spectralIndices.SPT, RHZ, RVT, RPS, RNG
        /// This method is only called from IndexCalulate.analysis() when the IndexCalculation Duration is less than 10 seconds,
        /// because need to recalculate background noise etc.
        /// Otherwise the constructor of this class is called: sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold);
        /// NOTE: We require a noise reduced decibel spectrogram
        /// FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth.
        /// </summary>
        public static SpectralPeakTracks CalculateSpectralPeakTracks(AudioRecording recording, int sampleStart, int sampleEnd, int frameSize, bool octaveScale, double peakThreshold)
        {
            double epsilon          = recording.Epsilon;
            int    sampleRate       = recording.WavReader.SampleRate;
            int    bufferFrameCount = 2; // 2 because must allow for edge effects when using 5x5 grid to find ridges.
            int    ridgeBuffer      = frameSize * bufferFrameCount;
            var    ridgeRecording   = AudioRecording.GetRecordingSubsegment(recording, sampleStart, sampleEnd, ridgeBuffer);
            int    frameStep        = frameSize;
            var    dspOutput        = DSP_Frames.ExtractEnvelopeAndFfts(ridgeRecording, frameSize, frameStep);

            // Generate the ridge SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram
            // i: generate the SUBSEGMENT deciBel spectrogram from the SUBSEGMENT amplitude spectrogram
            double[,] decibelSpectrogram;
            if (octaveScale)
            {
                var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000);
                decibelSpectrogram = OctaveFreqScale.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon, freqScale);
            }
            else
            {
                decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon);
            }

            // calculate the noise profile
            var spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);

            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            double nhDecibelThreshold = 2.0; // SPECTRAL dB THRESHOLD for smoothing background

            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhDecibelThreshold);

            // thresholds in decibels
            // double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second
            // TimeSpan frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond));

            var sptInfo = new SpectralPeakTracks(decibelSpectrogram, peakThreshold);

            return(sptInfo);
        }
Example #18
0
        public static Image <Rgb24> GetCepstralSpectrogram(
            SonogramConfig sonoConfig,
            AudioRecording recording,
            string sourceRecordingName)
        {
            // TODO at present noise reduction type must be set = Standard.
            sonoConfig.NoiseReductionType      = NoiseReductionType.Standard;
            sonoConfig.NoiseReductionParameter = 3.0;
            var cepgram  = new SpectrogramCepstral(sonoConfig, recording.WavReader);
            var image    = cepgram.GetImage();
            var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(
                "CEPSTRO-GRAM " + sourceRecordingName,
                image.Width,
                ImageTags[CepstralSpectrogram]);
            var      startTime          = TimeSpan.Zero;
            var      xAxisTicInterval   = TimeSpan.FromSeconds(1);
            TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(sonoConfig.WindowStep / (double)sonoConfig.SampleRate);
            var      labelInterval      = TimeSpan.FromSeconds(5);

            image = BaseSonogram.FrameSonogram(image, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval);
            return(image);
        }
Example #19
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="wavPath"></param>
        /// <param name="minHz"></param>
        /// <param name="maxHz"></param>
        /// <param name="frameOverlap"></param>
        /// <param name="threshold"></param>
        /// <param name="minDuration">used for smoothing intensity as well as for removing short events</param>
        /// <param name="maxDuration"></param>
        /// <returns></returns>
        public static Tuple <BaseSonogram, List <AcousticEvent>, double, double, double, double[]> Execute_Segmentation(FileInfo wavPath,
                                                                                                                        int minHz, int maxHz, double frameOverlap, double smoothWindow, double thresholdSD, double minDuration, double maxDuration)
        {
            //i: GET RECORDING
            AudioRecording recording = new AudioRecording(wavPath.FullName);

            //ii: MAKE SONOGRAM
            Log.WriteLine("# Start sonogram.");
            SonogramConfig sonoConfig = new SonogramConfig(); //default values config

            sonoConfig.WindowOverlap = frameOverlap;
            sonoConfig.SourceFName   = recording.BaseName;
            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            //iii: DETECT SEGMENTS
            Log.WriteLine("# Start event detection");
            var tuple = AcousticEvent.GetSegmentationEvents((SpectrogramStandard)sonogram, TimeSpan.Zero, minHz, maxHz, smoothWindow,
                                                            thresholdSD, minDuration, maxDuration);
            var tuple2 = Tuple.Create(sonogram, tuple.Item1, tuple.Item2, tuple.Item3, tuple.Item4, tuple.Item5);

            return(tuple2);
        }//end Execute_Segmentation
Example #20
0
        /// <summary>
        /// Generate a Spectrogram.
        /// </summary>
        /// <param name="bytes">
        /// The bytes.
        /// </param>
        /// <returns>
        /// Spectrogram image.
        /// </returns>
        /// <exception cref="NotSupportedException"><c>NotSupportedException</c>.</exception>
        public Bitmap Spectrogram(byte[] bytes)
        {
            /*
             * 80 pixels per second is too quick for Silverlight.
             * we want to use 40 pixels per second (half - window size of 0)
             */

            var sonogramConfig = new SonogramConfig
            {
                WindowOverlap = 0,     // was 0.5 when ppms was 0.08
                WindowSize    = 512,
                DoSnr         = false, // might save us some time generating spectrograms.
            };

            Bitmap image;

            using (var audiorecording = new AudioRecording(bytes))
            {
                // audiorecording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE
                if (audiorecording.SampleRate != 22050)
                {
                    var msg = string.Format(
                        "Must be able to convert audio to 22050hz. Audio has sample rate of {0}.",
                        audiorecording.SampleRate);

                    throw new NotSupportedException(msg);
                }

                using (var sonogram = new SpectrogramStandard(sonogramConfig, audiorecording.WavReader))
                    using (var img = sonogram.GetImage())
                    {
                        image = new Bitmap(img);
                    }
            }

            return(image);
        }
Example #21
0
        private Lazy <IndexCalculateResult[]> GetLazyIndices <T>(
            AudioRecording recording,
            AnalysisSettings analysisSettings,
            SegmentSettings <T> segmentSettings,
            AcousticIndices.AcousticIndicesConfig acousticConfiguration)
        {
            // Convert the Config config to IndexCalculateConfig class and merge in the unnecesary parameters.

            IndexCalculateResult[] Callback()
            {
                IndexCalculateResult[] subsegmentResults = AcousticIndices.CalculateIndicesInSubsegments(
                    recording,
                    segmentSettings.SegmentStartOffset,
                    segmentSettings.AnalysisIdealSegmentDuration,
                    acousticConfiguration.IndexCalculationDuration.Seconds(),
                    acousticConfiguration.IndexProperties,
                    segmentSettings.Segment.SourceMetadata.SampleRate,
                    acousticConfiguration);

                return(subsegmentResults);
            }

            return(new Lazy <IndexCalculateResult[]>(Callback, LazyThreadSafetyMode.ExecutionAndPublication));
        }
        /// <summary>
        /// cut audio to subsegments of desired length.
        /// return list of subsegments
        /// </summary>
        public static List <AudioRecording> GetSubsegmentsSamples(AudioRecording recording, double subsegmentDurationInSeconds, double frameStep)
        {
            List <AudioRecording> subsegments = new List <AudioRecording>();

            int    sampleRate            = recording.WavReader.SampleRate;
            var    segmentDuration       = recording.WavReader.Time.TotalSeconds;
            int    segmentSampleCount    = (int)(segmentDuration * sampleRate);
            int    subsegmentSampleCount = (int)(subsegmentDurationInSeconds * sampleRate);
            double subsegmentFrameCount  = subsegmentSampleCount / (double)frameStep;

            subsegmentFrameCount  = (int)subsegmentFrameCount;
            subsegmentSampleCount = ((int)(subsegmentFrameCount * frameStep) < subsegmentSampleCount) ? subsegmentSampleCount : (int)(subsegmentFrameCount * frameStep);

            for (int i = 0; i < (int)(segmentSampleCount / subsegmentSampleCount); i++)
            {
                AudioRecording subsegmentRecording = recording;
                double[]       subsamples          = DataTools.Subarray(recording.WavReader.Samples, i * subsegmentSampleCount, subsegmentSampleCount);
                var            wr = new Acoustics.Tools.Wav.WavReader(subsamples, 1, 16, sampleRate);
                subsegmentRecording = new AudioRecording(wr);
                subsegments.Add(subsegmentRecording);
            }

            return(subsegments);
        }
Example #23
0
        private void OnRecord(object sender, RoutedEventArgs e)
        {
            this.samples     = new List <float>();
            this.isRecording = true;
            string filename = GetNextWavFileName();

            try
            {
                this.recording = this.selectedDevice.StartRecording(filename, (s, sample) =>
                {
                    // handle PCM data on background thread
                    //
                    UiDispatcher.RunOnUIThread(new Action(() =>
                    {
                        if (this.isRecording)
                        {
                            CollectSamples(sample);
                        }
                        if (sample.Error != null)
                        {
                            ShowStatus(sample.Error);
                        }
                        else if (!sample.Closed)
                        {
                            // keep pumping no matter what so we don't get any buffering on next recording.
                            this.recording.ReadNextFrame();
                        }
                    }));
                });
            }
            catch (Exception ex)
            {
                ShowStatus(ex.Message);
            }
            SetButtonState();
        }
Example #24
0
        /// <summary>
        /// Запускает голосовое управление
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private async void RecordAndAnalyze(object sender, EventArgs e)
        {
            try
            {
                MainGrid.IsEnabled = false;
                Vibration.Vibrate();
                BottomPanel.BackgroundColor = Color.FromHex("#C00000");

                if (await AudioRecording.CheckAudioPermissions())
                {
                    await AudioRecording.RecordAudio();
                    await AnalizeCommandHandwritten();
                }
            }
            catch (Exception)
            {
                await Navigation.PushAsync(new SomethingWentWrongPage());
            }
            finally
            {
                MainGrid.IsEnabled          = true;
                BottomPanel.BackgroundColor = Color.Black;
            }
        }
Example #25
0
        public AnalysisResult2 Analyze <T>(AnalysisSettings analysisSettings, SegmentSettings <T> segmentSettings)
        {
            var acousticIndicesConfiguration = (AcousticIndicesConfig)analysisSettings.AnalysisAnalyzerSpecificConfiguration;
            var indexCalculationDuration     = acousticIndicesConfiguration.IndexCalculationDuration.Seconds();

            var audioFile       = segmentSettings.SegmentAudioFile;
            var recording       = new AudioRecording(audioFile.FullName);
            var outputDirectory = segmentSettings.SegmentOutputDirectory;

            var analysisResults = new AnalysisResult2(analysisSettings, segmentSettings, recording.Duration);

            analysisResults.AnalysisIdentifier = this.Identifier;

            // calculate indices for each subsegment
            IndexCalculateResult[] subsegmentResults = CalculateIndicesInSubsegments(
                recording,
                segmentSettings.SegmentStartOffset,
                segmentSettings.AnalysisIdealSegmentDuration,
                indexCalculationDuration,
                acousticIndicesConfiguration.IndexProperties,
                segmentSettings.Segment.SourceMetadata.SampleRate,
                acousticIndicesConfiguration);

            var trackScores = new List <Plot>(subsegmentResults.Length);
            var tracks      = new List <Track>(subsegmentResults.Length);

            analysisResults.SummaryIndices  = new SummaryIndexBase[subsegmentResults.Length];
            analysisResults.SpectralIndices = new SpectralIndexBase[subsegmentResults.Length];
            for (int i = 0; i < subsegmentResults.Length; i++)
            {
                var indexCalculateResult = subsegmentResults[i];
                indexCalculateResult.SummaryIndexValues.FileName  = segmentSettings.Segment.SourceMetadata.Identifier;
                indexCalculateResult.SpectralIndexValues.FileName = segmentSettings.Segment.SourceMetadata.Identifier;

                analysisResults.SummaryIndices[i]  = indexCalculateResult.SummaryIndexValues;
                analysisResults.SpectralIndices[i] = indexCalculateResult.SpectralIndexValues;
                trackScores.AddRange(indexCalculateResult.TrackScores);
                if (indexCalculateResult.Tracks != null)
                {
                    tracks.AddRange(indexCalculateResult.Tracks);
                }
            }

            if (analysisSettings.AnalysisDataSaveBehavior)
            {
                this.WriteSummaryIndicesFile(segmentSettings.SegmentSummaryIndicesFile, analysisResults.SummaryIndices);
                analysisResults.SummaryIndicesFile = segmentSettings.SegmentSummaryIndicesFile;
            }

            if (analysisSettings.AnalysisDataSaveBehavior)
            {
                analysisResults.SpectraIndicesFiles =
                    WriteSpectrumIndicesFilesCustom(
                        segmentSettings.SegmentSpectrumIndicesDirectory,
                        Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name),
                        analysisResults.SpectralIndices);
            }

            // write the segment spectrogram (typically of one minute duration) to CSV
            // this is required if you want to produced zoomed spectrograms at a resolution greater than 0.2 seconds/pixel
            bool saveSonogramData = analysisSettings.Configuration.GetBoolOrNull(AnalysisKeys.SaveSonogramData) ?? false;

            if (saveSonogramData || analysisSettings.AnalysisImageSaveBehavior.ShouldSave(analysisResults.Events.Length))
            {
                var sonoConfig = new SonogramConfig(); // default values config
                sonoConfig.SourceFName   = recording.FilePath;
                sonoConfig.WindowSize    = acousticIndicesConfiguration.FrameLength;
                sonoConfig.WindowStep    = analysisSettings.Configuration.GetIntOrNull(AnalysisKeys.FrameStep) ?? sonoConfig.WindowSize; // default = no overlap
                sonoConfig.WindowOverlap = (sonoConfig.WindowSize - sonoConfig.WindowStep) / (double)sonoConfig.WindowSize;

                // Linear or Octave frequency scale?
                bool octaveScale = analysisSettings.Configuration.GetBoolOrNull(AnalysisKeys.KeyOctaveFreqScale) ?? false;
                if (octaveScale)
                {
                    sonoConfig.WindowStep    = sonoConfig.WindowSize;
                    sonoConfig.WindowOverlap = (sonoConfig.WindowSize - sonoConfig.WindowStep) / (double)sonoConfig.WindowSize;
                }

                ////sonoConfig.NoiseReductionType = NoiseReductionType.NONE; // the default
                ////sonoConfig.NoiseReductionType = NoiseReductionType.STANDARD;
                var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

                // remove the DC row of the spectrogram
                sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);

                if (analysisSettings.AnalysisImageSaveBehavior.ShouldSave())
                {
                    string imagePath = Path.Combine(outputDirectory.FullName, segmentSettings.SegmentImageFile.Name);

                    // NOTE: hits (SPT in this case) is intentionally not supported
                    var image = DrawSonogram(sonogram, null, trackScores, tracks);
                    image.Save(imagePath);
                    analysisResults.ImageFile = new FileInfo(imagePath);
                }

                if (saveSonogramData)
                {
                    string csvPath = Path.Combine(outputDirectory.FullName, recording.BaseName + ".csv");
                    Csv.WriteMatrixToCsv(csvPath.ToFileInfo(), sonogram.Data);
                }
            }

            return(analysisResults);
        }
Example #26
0
        public static IndexCalculateResult[] CalculateIndicesInSubsegments(
            AudioRecording recording,
            TimeSpan segmentStartOffset,
            TimeSpan segmentDuration,
            TimeSpan indexCalculationDuration,
            Dictionary <string, IndexProperties> indexProperties,
            int sampleRateOfOriginalAudioFile,
            IndexCalculateConfig config)
        {
            if (recording.WavReader.Channels > 1)
            {
                throw new InvalidOperationException(
                          @"A multi-channel recording MUST be mixed down to MONO before calculating acoustic indices!");
            }

            double recordingDuration  = recording.Duration.TotalSeconds;
            double subsegmentDuration = indexCalculationDuration.TotalSeconds;

            // intentional possible null ref, throw if not null
            double segmentDurationSeconds = segmentDuration.TotalSeconds;
            double audioCuttingError      = subsegmentDuration - segmentDurationSeconds;

            // using the expected duration, each call to analyze will always produce the same number of results
            // round, we expect perfect numbers, warn if not
            double       subsegmentsInSegment = segmentDurationSeconds / subsegmentDuration;
            int          subsegmentCount      = (int)Math.Round(segmentDurationSeconds / subsegmentDuration);
            const double warningThreshold     = 0.01; // 1%
            double       fraction             = subsegmentsInSegment - subsegmentCount;

            if (Math.Abs(fraction) > warningThreshold)
            {
                Log.Warn(
                    string.Format(
                        "The IndexCalculationDuration ({0}) does not fit well into the provided segment ({1}). This means a partial result has been {3}, {2} results will be calculated",
                        subsegmentDuration,
                        segmentDurationSeconds,
                        subsegmentCount,
                        fraction >= 0.5 ? "added" : "removed"));
            }

            Log.Trace(subsegmentCount + " sub segments will be calculated");

            var indexCalculateResults = new IndexCalculateResult[subsegmentCount];

            // calculate indices for each subsegment
            for (int i = 0; i < subsegmentCount; i++)
            {
                var subsegmentOffset     = segmentStartOffset + TimeSpan.FromSeconds(i * subsegmentDuration);
                var indexCalculateResult = IndexCalculate.Analysis(
                    recording,
                    subsegmentOffset,
                    indexProperties,
                    sampleRateOfOriginalAudioFile,
                    segmentStartOffset,
                    config);

                indexCalculateResults[i] = indexCalculateResult;
            }

            return(indexCalculateResults);
        }
Example #27
0
        /// <summary>
        /// THE KEY ANALYSIS METHOD
        /// </summary>
        /// <param name="recording">
        ///     The segment Of Source File.
        /// </param>
        /// <param name="configDict">
        ///     The config Dict.
        /// </param>
        /// <param name="value"></param>
        /// <returns>
        /// The <see cref="LimnodynastesConvexResults"/>.
        /// </returns>
        internal static LimnodynastesConvexResults Analysis(
            Dictionary <string, double[, ]> dictionaryOfHiResSpectralIndices,
            AudioRecording recording,
            Dictionary <string, string> configDict,
            AnalysisSettings analysisSettings,
            SegmentSettingsBase segmentSettings)
        {
            // for Limnodynastes convex, in the D.Stewart CD, there are peaks close to:
            //1. 1950 Hz
            //2. 1460 hz
            //3.  970 hz    These are 490 Hz apart.
            // for Limnodynastes convex, in the JCU recording, there are peaks close to:
            //1. 1780 Hz
            //2. 1330 hz
            //3.  880 hz    These are 450 Hz apart.

            // So strategy is to look for three peaks separated by same amount and in the vicinity of the above,
            //  starting with highest power (the top peak) and working down to lowest power (bottom peak).

            var      outputDir          = segmentSettings.SegmentOutputDirectory;
            TimeSpan segmentStartOffset = segmentSettings.SegmentStartOffset;

            //KeyValuePair<string, double[,]> kvp = dictionaryOfHiResSpectralIndices.First();
            var spg         = dictionaryOfHiResSpectralIndices["RHZ"];
            int rhzRowCount = spg.GetLength(0);
            int rhzColCount = spg.GetLength(1);

            int    sampleRate        = recording.SampleRate;
            double herzPerBin        = sampleRate / 2 / (double)rhzRowCount;
            double scoreThreshold    = (double?)double.Parse(configDict["EventThreshold"]) ?? 3.0;
            int    minimumFrequency  = (int?)int.Parse(configDict["MinHz"]) ?? 850;
            int    dominantFrequency = (int?)int.Parse(configDict["DominantFrequency"]) ?? 1850;

            // # The Limnodynastes call has three major peaks. The dominant peak is at 1850 or as set above.
            // # The second and third peak are at equal gaps below. DominantFreq-gap and DominantFreq-(2*gap);
            // # Set the gap in the Config file. Should typically be in range 880 to 970
            int peakGapInHerz = (int?)int.Parse(configDict["PeakGap"]) ?? 470;
            int F1AndF2Gap    = (int)Math.Round(peakGapInHerz / herzPerBin);
            //int F1AndF2Gap = 10; // 10 = number of freq bins
            int F1AndF3Gap = 2 * F1AndF2Gap;
            //int F1AndF3Gap = 20;

            int hzBuffer       = 250;
            int bottomBin      = 5;
            int dominantBin    = (int)Math.Round(dominantFrequency / herzPerBin);
            int binBuffer      = (int)Math.Round(hzBuffer / herzPerBin);;
            int dominantBinMin = dominantBin - binBuffer;
            int dominantBinMax = dominantBin + binBuffer;

            //  freqBin + rowID = binCount - 1;
            // therefore: rowID = binCount - freqBin - 1;
            int minRowID  = rhzRowCount - dominantBinMax - 1;
            int maxRowID  = rhzRowCount - dominantBinMin - 1;
            int bottomRow = rhzRowCount - bottomBin - 1;

            var list = new List <Point>();

            // loop through all spectra/columns of the hi-res spectrogram.
            for (int c = 1; c < rhzColCount - 1; c++)
            {
                double maxAmplitude            = -double.MaxValue;
                int    idOfRowWithMaxAmplitude = 0;

                for (int r = minRowID; r <= bottomRow; r++)
                {
                    if (spg[r, c] > maxAmplitude)
                    {
                        maxAmplitude            = spg[r, c];
                        idOfRowWithMaxAmplitude = r;
                    }
                }

                if (idOfRowWithMaxAmplitude < minRowID)
                {
                    continue;
                }
                if (idOfRowWithMaxAmplitude > maxRowID)
                {
                    continue;
                }

                // want a spectral peak.
                if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c - 1])
                {
                    continue;
                }
                if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c + 1])
                {
                    continue;
                }
                // peak should exceed thresold amplitude
                if (spg[idOfRowWithMaxAmplitude, c] < 3.0)
                {
                    continue;
                }

                // convert row ID to freq bin ID
                int freqBinID = rhzRowCount - idOfRowWithMaxAmplitude - 1;
                list.Add(new Point(c, freqBinID));
                // we now have a list of potential hits for LimCon. This needs to be filtered.

                // Console.WriteLine("Col {0}, Bin {1}  ", c, freqBinID);
            }

            // DEBUG ONLY // ################################ TEMPORARY ################################
            // superimpose point on RHZ HiRes spectrogram for debug purposes
            bool drawOnHiResSpectrogram = true;
            //string filePath = @"G:\SensorNetworks\Output\Frogs\TestOfHiResIndices-2016July\Test\Towsey.HiResIndices\SpectrogramImages\3mile_creek_dam_-_Herveys_Range_1076_248366_20130305_001700_30_0min.CombinedGreyScale.png";
            var    fileName   = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name);
            string filePath   = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScale.png";
            var    debugImage = new FileInfo(filePath);

            if (!debugImage.Exists)
            {
                drawOnHiResSpectrogram = false;
            }
            if (drawOnHiResSpectrogram)
            {
                // put red dot where max is
                Bitmap bmp = new Bitmap(filePath);
                foreach (Point point in list)
                {
                    bmp.SetPixel(point.X + 70, 1911 - point.Y, Color.Red);
                }
                // mark off every tenth frequency bin
                for (int r = 0; r < 26; r++)
                {
                    bmp.SetPixel(68, 1911 - (r * 10), Color.Blue);
                    bmp.SetPixel(69, 1911 - (r * 10), Color.Blue);
                }
                // mark off upper bound and lower frequency bound
                bmp.SetPixel(69, 1911 - dominantBinMin, Color.Lime);
                bmp.SetPixel(69, 1911 - dominantBinMax, Color.Lime);
                //bmp.SetPixel(69, 1911 - maxRowID, Color.Lime);
                string opFilePath = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScaleAnnotated.png";
                bmp.Save(opFilePath);
            }
            // END DEBUG ################################ TEMPORARY ################################

            // now construct the standard decibel spectrogram WITHOUT noise removal, and look for LimConvex
            // get frame parameters for the analysis
            double epsilon   = Math.Pow(0.5, recording.BitsPerSample - 1);
            int    frameSize = rhzRowCount * 2;
            int    frameStep = frameSize; // this default = zero overlap
            double frameDurationInSeconds = frameSize / (double)sampleRate;
            double frameStepInSeconds     = frameStep / (double)sampleRate;
            double framesPerSec           = 1 / frameStepInSeconds;
            //var dspOutput = DSP_Frames.ExtractEnvelopeAndFFTs(recording, frameSize, frameStep);
            //// Generate deciBel spectrogram
            //double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.amplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon);

            // i: Init SONOGRAM config
            var sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameSize,
                WindowOverlap      = 0.0,
                NoiseReductionType = NoiseReductionType.None,
            };
            // init sonogram
            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // remove the DC row of the spectrogram
            sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);
            //scores.Add(new Plot("Decibels", DataTools.NormaliseMatrixValues(dBArray), ActivityAndCover.DefaultActivityThresholdDb));
            //scores.Add(new Plot("Active Frames", DataTools.Bool2Binary(activity.activeFrames), 0.0));

            // convert spectral peaks to frequency
            //var tuple_DecibelPeaks = SpectrogramTools.HistogramOfSpectralPeaks(deciBelSpectrogram);
            //int[] peaksBins = tuple_DecibelPeaks.Item2;
            //double[] freqPeaks = new double[peaksBins.Length];
            //int binCount = sonogram.Data.GetLength(1);
            //for (int i = 1; i < peaksBins.Length; i++) freqPeaks[i] = (lowerBinBound + peaksBins[i]) / (double)nyquistBin;
            //scores.Add(new Plot("Max Frequency", freqPeaks, 0.0));  // location of peaks for spectral images

            // create new list of LimCon hits in the standard spectrogram.
            double timeSpanOfFrameInSeconds = frameSize / (double)sampleRate;
            var    newList     = new List <int[]>();
            int    lastFrameID = sonogram.Data.GetLength(0) - 1;
            int    lastBinID   = sonogram.Data.GetLength(1) - 1;

            foreach (Point point in list)
            {
                double secondsFromStartOfSegment = (point.X * 0.1) + 0.05; // convert point.Y to center of time-block.
                int    framesFromStartOfSegment  = (int)Math.Round(secondsFromStartOfSegment / timeSpanOfFrameInSeconds);

                // location of max point is uncertain, so search in neighbourhood.
                // NOTE: sonogram.data matrix is time*freqBin
                double maxValue = -double.MaxValue;
                int    idOfTMax = framesFromStartOfSegment;
                int    idOfFMax = point.Y;
                for (int deltaT = -4; deltaT <= 4; deltaT++)
                {
                    for (int deltaF = -1; deltaF <= 1; deltaF++)
                    {
                        int newT = framesFromStartOfSegment + deltaT;
                        if (newT < 0)
                        {
                            newT = 0;
                        }
                        else if (newT > lastFrameID)
                        {
                            newT = lastFrameID;
                        }

                        double value = sonogram.Data[newT, point.Y + deltaF];
                        if (value > maxValue)
                        {
                            maxValue = value;
                            idOfTMax = framesFromStartOfSegment + deltaT;
                            idOfFMax = point.Y + deltaF;
                        }
                    }
                }

                // newList.Add(new Point(frameSpan, point.Y));
                int[] array = new int[2];
                array[0] = idOfTMax;
                array[1] = idOfFMax;
                newList.Add(array);
            }

            // Now obtain more of spectrogram to see if have peaks at two other places characteristic of Limnodynastes convex.
            // In the D.Stewart CD, there are peaks close to:
            //1. 1950 Hz
            //2. 1460 hz
            //3.  970 hz    These are 490 Hz apart.
            // For Limnodynastes convex, in the JCU recording, there are peaks close to:
            //1. 1780 Hz
            //2. 1330 hz
            //3.  880 hz    These are 450 Hz apart.

            // So strategy is to look for three peaks separated by same amount and in the vicinity of the above,
            //  starting with highest power (the top peak) and working down to lowest power (bottom peak).
            //We have found top/highest peak - now find the other two.
            int secondDominantFrequency = 1380;
            int secondDominantBin       = (int)Math.Round(secondDominantFrequency / herzPerBin);
            int thirdDominantFrequency  = 900;
            int thirdDominantBin        = (int)Math.Round(thirdDominantFrequency / herzPerBin);

            var acousticEvents = new List <AcousticEvent>();
            int Tbuffer        = 2;

            // First extract a sub-matrix.
            foreach (int[] array in newList)
            {
                // NOTE: sonogram.data matrix is time*freqBin
                int Tframe = array[0];
                int F1bin  = array[1];
                double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - Tbuffer, 0, Tframe + Tbuffer, F1bin);
                double F1power = subMatrix[Tbuffer, F1bin];
                // convert to vector
                var spectrum = MatrixTools.GetColumnAverages(subMatrix);

                // use the following code to get estimate of background noise
                double[,] powerMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - 3, 10, Tframe + 3, F1bin);
                double averagePower = (MatrixTools.GetRowAverages(powerMatrix)).Average();
                double score        = F1power - averagePower;

                // debug - checking what the spectrum looks like.
                //for (int i = 0; i < 18; i++)
                //    spectrum[i] = -100.0;
                //DataTools.writeBarGraph(spectrum);

                // locate the peaks in lower frequency bands, F2 and F3
                bool[] peaks = DataTools.GetPeaks(spectrum);

                int    F2bin   = 0;
                double F2power = -200.0; // dB
                for (int i = -3; i <= 2; i++)
                {
                    int bin = F1bin - F1AndF2Gap + i;
                    if ((peaks[bin]) && (F2power < subMatrix[1, bin]))
                    {
                        F2bin   = bin;
                        F2power = subMatrix[1, bin];
                    }
                }
                if (F2bin == 0)
                {
                    continue;
                }
                if (F2power == -200.0)
                {
                    continue;
                }
                score += (F2power - averagePower);

                int    F3bin   = 0;
                double F3power = -200.0;
                for (int i = -5; i <= 2; i++)
                {
                    int bin = F1bin - F1AndF3Gap + i;
                    if ((peaks[bin]) && (F3power < subMatrix[1, bin]))
                    {
                        F3bin   = bin;
                        F3power = subMatrix[1, bin];
                    }
                }
                if (F3bin == 0)
                {
                    continue;
                }
                if (F3power == -200.0)
                {
                    continue;
                }

                score += (F3power - averagePower);
                score /= 3;

                // ignore events where SNR < decibel threshold
                if (score < scoreThreshold)
                {
                    continue;
                }

                // ignore events with wrong power distribution. A good LimnoConvex call has strongest F1 power
                if ((F3power > F1power) || (F2power > F1power))
                {
                    continue;
                }

                //freq Bin ID must be converted back to Matrix row ID
                //  freqBin + rowID = binCount - 1;
                // therefore: rowID = binCount - freqBin - 1;
                minRowID = rhzRowCount - F1bin - 2;
                maxRowID = rhzRowCount - F3bin - 1;
                int F1RowID = rhzRowCount - F1bin - 1;
                int F2RowID = rhzRowCount - F2bin - 1;
                int F3RowID = rhzRowCount - F3bin - 1;

                int    maxfreq             = dominantFrequency + hzBuffer;
                int    topBin              = (int)Math.Round(maxfreq / herzPerBin);
                int    frameCount          = 4;
                double duration            = frameCount * frameStepInSeconds;
                double startTimeWrtSegment = (Tframe - 2) * frameStepInSeconds;

                // Got to here so start initialising an acoustic event
                var ae = new AcousticEvent(segmentStartOffset, startTimeWrtSegment, duration, minimumFrequency, maxfreq);
                ae.SetTimeAndFreqScales(framesPerSec, herzPerBin);
                //var ae = new AcousticEvent(oblong, recording.Nyquist, binCount, frameDurationInSeconds, frameStepInSeconds, frameCount);
                //ae.StartOffset = TimeSpan.FromSeconds(Tframe * frameStepInSeconds);

                var pointF1 = new Point(2, topBin - F1bin);
                var pointF2 = new Point(2, topBin - F2bin);
                var pointF3 = new Point(2, topBin - F3bin);
                ae.Points = new List <Point>();
                ae.Points.Add(pointF1);
                ae.Points.Add(pointF2);
                ae.Points.Add(pointF3);
                //tried using HitElements but did not do what I wanted later on.
                //ae.HitElements = new HashSet<Point>();
                //ae.HitElements = new SortedSet<Point>();
                //ae.HitElements.Add(pointF1);
                //ae.HitElements.Add(pointF2);
                //ae.HitElements.Add(pointF3);
                ae.Score = score;
                //ae.MinFreq = Math.Round((topBin - F3bin - 5) * herzPerBin);
                //ae.MaxFreq = Math.Round(topBin * herzPerBin);
                acousticEvents.Add(ae);
            }

            // now add in extra common info to the acoustic events
            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = configDict[AnalysisKeys.SpeciesName];
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recording.Duration.TotalSeconds;
                ae.Name         = abbreviatedName;
                ae.BorderColour = Color.Red;
                ae.FileName     = recording.BaseName;
            });

            double[] scores = new double[rhzColCount];          // predefinition of score array
            double   nomalisationConstant = scoreThreshold * 4; // four times the score threshold
            double   compressionFactor    = rhzColCount / (double)sonogram.Data.GetLength(0);

            foreach (AcousticEvent ae in acousticEvents)
            {
                ae.ScoreNormalised = ae.Score / nomalisationConstant;
                if (ae.ScoreNormalised > 1.0)
                {
                    ae.ScoreNormalised = 1.0;
                }
                int frameID      = (int)Math.Round(ae.EventStartSeconds / frameDurationInSeconds);
                int hiresFrameID = (int)Math.Floor(frameID * compressionFactor);
                scores[hiresFrameID] = ae.ScoreNormalised;
            }
            var plot = new Plot(AnalysisName, scores, scoreThreshold);

            // DEBUG ONLY ################################ TEMPORARY ################################
            // Draw a standard spectrogram and mark of hites etc.
            bool createStandardDebugSpectrogram = true;

            var imageDir = new DirectoryInfo(outputDir.FullName + @"\SpectrogramImages");

            if (!imageDir.Exists)
            {
                imageDir.Create();
            }
            if (createStandardDebugSpectrogram)
            {
                var    fileName2 = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name);
                string filePath2 = Path.Combine(imageDir.FullName, fileName + ".Spectrogram.png");
                Bitmap sonoBmp   = (Bitmap)sonogram.GetImage();
                int    height    = sonoBmp.Height;
                foreach (AcousticEvent ae in acousticEvents)
                {
                    ae.DrawEvent(sonoBmp);
                    //g.DrawRectangle(pen, ob.ColumnLeft, ob.RowTop, ob.ColWidth-1, ob.RowWidth);
                    //ae.DrawPoint(sonoBmp, ae.HitElements.[0], Color.OrangeRed);
                    //ae.DrawPoint(sonoBmp, ae.HitElements[1], Color.Yellow);
                    //ae.DrawPoint(sonoBmp, ae.HitElements[2], Color.Green);
                    ae.DrawPoint(sonoBmp, ae.Points[0], Color.OrangeRed);
                    ae.DrawPoint(sonoBmp, ae.Points[1], Color.Yellow);
                    ae.DrawPoint(sonoBmp, ae.Points[2], Color.LimeGreen);
                }

                // draw the original hits on the standard sonogram
                foreach (int[] array in newList)
                {
                    sonoBmp.SetPixel(array[0], height - array[1], Color.Cyan);
                }

                // mark off every tenth frequency bin on the standard sonogram
                for (int r = 0; r < 20; r++)
                {
                    sonoBmp.SetPixel(0, height - (r * 10) - 1, Color.Blue);
                    sonoBmp.SetPixel(1, height - (r * 10) - 1, Color.Blue);
                }
                // mark off upper bound and lower frequency bound
                sonoBmp.SetPixel(0, height - dominantBinMin, Color.Lime);
                sonoBmp.SetPixel(0, height - dominantBinMax, Color.Lime);
                sonoBmp.Save(filePath2);
            }
            // END DEBUG ################################ TEMPORARY ################################

            return(new LimnodynastesConvexResults
            {
                Sonogram = sonogram,
                Hits = null,
                Plot = plot,
                Events = acousticEvents,
                RecordingDuration = recording.Duration,
            });
        } // Analysis()
Example #28
0
        public static void GenerateSpectrograms()
        {
            var recordingDir = @"M:\Liz\SupervisedPatchSamplingSet\Recordings\";
            var resultDir    = @"M:\Liz\SupervisedPatchSamplingSet\";

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(recordingDir, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            int           frameSize     = 1024;
            int           finalBinCount = 256;
            FreqScaleType scaleType     = FreqScaleType.Mel;
            var           settings      = new SpectrogramSettings()
            {
                WindowSize = frameSize,

                // the duration of each frame (according to the default value (i.e., 1024) of frame size) is 0.04644 seconds
                // The question is how many single-frames (i.e., patch height is equal to 1) should be selected to form one second
                // The "WindowOverlap" is calculated to answer this question
                // each 24 single-frames duration is equal to 1 second
                // note that the "WindowOverlap" value should be recalculated if frame size is changed
                // this has not yet been considered in the Config file!
                WindowOverlap           = 0.10725204,
                DoMelScale              = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount             = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            foreach (string filePath in Directory.GetFiles(recordingDir, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    settings.SourceFileName = recording.BaseName;

                    var amplitudeSpectrogram = new AmplitudeSpectrogram(settings, recording.WavReader);

                    var decibelSpectrogram = new DecibelSpectrogram(amplitudeSpectrogram);

                    // DO NOISE REDUCTION
                    decibelSpectrogram.Data = PcaWhitening.NoiseReduction(decibelSpectrogram.Data);

                    // draw the spectrogram
                    var attributes = new SpectrogramAttributes()
                    {
                        NyquistFrequency = decibelSpectrogram.Attributes.NyquistFrequency,
                        Duration         = decibelSpectrogram.Attributes.Duration,
                    };

                    Image  image = DecibelSpectrogram.DrawSpectrogramAnnotated(decibelSpectrogram.Data, settings, attributes);
                    string pathToSpectrogramFiles = Path.Combine(resultDir, "Spectrograms", settings.SourceFileName + ".bmp");
                    image.Save(pathToSpectrogramFiles);

                    // write the matrix to a csv file
                    string pathToMatrixFiles = Path.Combine(resultDir, "Matrices", settings.SourceFileName + ".csv");
                    Csv.WriteMatrixToCsv(pathToMatrixFiles.ToFileInfo(), decibelSpectrogram.Data);
                }
            }
        }
Example #29
0
        /// <summary>
        /// The CORE ANALYSIS METHOD.
        /// </summary>
        public static Tuple <BaseSonogram, double[, ], Plot, List <AcousticEvent>, TimeSpan> Analysis(FileInfo fiSegmentOfSourceFile, Dictionary <string, string> configDict, TimeSpan segmentStartOffset)
        {
            //set default values -
            int frameLength = 1024;

            if (configDict.ContainsKey(AnalysisKeys.FrameLength))
            {
                frameLength = int.Parse(configDict[AnalysisKeys.FrameLength]);
            }

            double windowOverlap              = 0.0;
            int    minHz                      = int.Parse(configDict["MIN_HZ"]);
            int    minFormantgap              = int.Parse(configDict["MIN_FORMANT_GAP"]);
            int    maxFormantgap              = int.Parse(configDict["MAX_FORMANT_GAP"]);
            double decibelThreshold           = double.Parse(configDict["DECIBEL_THRESHOLD"]);   //dB
            double harmonicIntensityThreshold = double.Parse(configDict["INTENSITY_THRESHOLD"]); //in 0-1
            double callDuration               = double.Parse(configDict["CALL_DURATION"]);       // seconds

            AudioRecording recording = new AudioRecording(fiSegmentOfSourceFile.FullName);

            //i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameLength,
                WindowOverlap      = windowOverlap,
                NoiseReductionType = SNR.KeyToNoiseReductionType("STANDARD"),
            }; //default values config

            TimeSpan tsRecordingtDuration = recording.Duration;
            int      sr              = recording.SampleRate;
            double   freqBinWidth    = sr / (double)sonoConfig.WindowSize;
            double   framesPerSecond = freqBinWidth;

            //the Xcorrelation-FFT technique requires number of bins to scan to be power of 2.
            //assuming sr=17640 and window=1024, then  64 bins span 1100 Hz above the min Hz level. i.e. 500 to 1600
            //assuming sr=17640 and window=1024, then 128 bins span 2200 Hz above the min Hz level. i.e. 500 to 2700
            int numberOfBins = 64;
            int minBin       = (int)Math.Round(minHz / freqBinWidth) + 1;
            int maxbin       = minBin + numberOfBins - 1;
            int maxHz        = (int)Math.Round(minHz + (numberOfBins * freqBinWidth));

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount = sonogram.Data.GetLength(0);
            int          colCount = sonogram.Data.GetLength(1);

            double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, rowCount - 1, maxbin);

            int callSpan = (int)Math.Round(callDuration * framesPerSecond);

            //#############################################################################################################################################
            //ii: DETECT HARMONICS
            var results = CrossCorrelation.DetectHarmonicsInSonogramMatrix(subMatrix, decibelThreshold, callSpan);

            double[] dBArray     = results.Item1;
            double[] intensity   = results.Item2;   //an array of periodicity scores
            double[] periodicity = results.Item3;

            //intensity = DataTools.filterMovingAverage(intensity, 3);
            int noiseBound = (int)(100 / freqBinWidth); //ignore 0-100 hz - too much noise

            double[] scoreArray = new double[intensity.Length];
            for (int r = 0; r < rowCount; r++)
            {
                if (intensity[r] < harmonicIntensityThreshold)
                {
                    continue;
                }

                //ignore locations with incorrect formant gap
                double herzPeriod = periodicity[r] * freqBinWidth;
                if (herzPeriod < minFormantgap || herzPeriod > maxFormantgap)
                {
                    continue;
                }

                //find freq having max power and use info to adjust score.
                //expect humans to have max < 1000 Hz
                double[] spectrum = MatrixTools.GetRow(sonogram.Data, r);
                for (int j = 0; j < noiseBound; j++)
                {
                    spectrum[j] = 0.0;
                }

                int    maxIndex         = DataTools.GetMaxIndex(spectrum);
                int    freqWithMaxPower = (int)Math.Round(maxIndex * freqBinWidth);
                double discount         = 1.0;
                if (freqWithMaxPower < 1200)
                {
                    discount = 0.0;
                }

                if (intensity[r] > harmonicIntensityThreshold)
                {
                    scoreArray[r] = intensity[r] * discount;
                }
            }

            //transfer info to a hits matrix.
            var    hits      = new double[rowCount, colCount];
            double threshold = harmonicIntensityThreshold * 0.75; //reduced threshold for display of hits

            for (int r = 0; r < rowCount; r++)
            {
                if (scoreArray[r] < threshold)
                {
                    continue;
                }

                double herzPeriod = periodicity[r] * freqBinWidth;
                for (int c = minBin; c < maxbin; c++)
                {
                    //hits[r, c] = herzPeriod / (double)380;  //divide by 380 to get a relativePeriod;
                    hits[r, c] = (herzPeriod - minFormantgap) / maxFormantgap;  //to get a relativePeriod;
                }
            }

            //iii: CONVERT TO ACOUSTIC EVENTS
            double maxPossibleScore = 0.5;
            int    halfCallSpan     = callSpan / 2;
            var    predictedEvents  = new List <AcousticEvent>();

            for (int i = 0; i < rowCount; i++)
            {
                //assume one score position per crow call
                if (scoreArray[i] < 0.001)
                {
                    continue;
                }

                double        startTime = (i - halfCallSpan) / framesPerSecond;
                AcousticEvent ev        = new AcousticEvent(segmentStartOffset, startTime, callDuration, minHz, maxHz);
                ev.SetTimeAndFreqScales(framesPerSecond, freqBinWidth);
                ev.Score           = scoreArray[i];
                ev.ScoreNormalised = ev.Score / maxPossibleScore; // normalised to the user supplied threshold

                //ev.Score_MaxPossible = maxPossibleScore;
                predictedEvents.Add(ev);
            } //for loop

            Plot plot = new Plot("CROW", intensity, harmonicIntensityThreshold);

            return(Tuple.Create(sonogram, hits, plot, predictedEvents, tsRecordingtDuration));
        } //Analysis()
Example #30
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            string speciesName            = configuration[AnalysisKeys.SpeciesName] ?? "<no species>";
            string abbreviatedSpeciesName = configuration[AnalysisKeys.AbbreviatedSpeciesName] ?? "<no.sp>";

            int minHz = configuration.GetInt(AnalysisKeys.MinHz);
            int maxHz = configuration.GetInt(AnalysisKeys.MaxHz);

            // BETTER TO CALCULATE THIS. IGNORE USER!
            // double frameOverlap = Double.Parse(configDict[Keys.FRAME_OVERLAP]);

            // duration of DCT in seconds
            double dctDuration = configuration.GetDouble(AnalysisKeys.DctDuration);

            // minimum acceptable value of a DCT coefficient
            double dctThreshold = configuration.GetDouble(AnalysisKeys.DctThreshold);

            // ignore oscillations below this threshold freq
            int minOscilFreq = configuration.GetInt(AnalysisKeys.MinOscilFreq);

            // ignore oscillations above this threshold freq
            int maxOscilFreq = configuration.GetInt(AnalysisKeys.MaxOscilFreq);

            // min duration of event in seconds
            double minDuration = configuration.GetDouble(AnalysisKeys.MinDuration);

            // max duration of event in seconds
            double maxDuration = configuration.GetDouble(AnalysisKeys.MaxDuration);

            // min score for an acceptable event
            double eventThreshold = configuration.GetDouble(AnalysisKeys.EventThreshold);

            if (recording.WavReader.SampleRate != 22050)
            {
                throw new InvalidOperationException("Requires a 22050Hz file");
            }

            // The default was 512 for Canetoad.
            // Set longer Framesize for calls having longer pulse periodicity.
            const int FrameSize     = 128;
            double    windowOverlap = Oscillations2012.CalculateRequiredFrameOverlap(
                recording.SampleRate,
                FrameSize,
                maxOscilFreq);

            //windowOverlap = 0.75; // previous default

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = FrameSize,
                WindowOverlap = windowOverlap,

                //NoiseReductionType = NoiseReductionType.NONE,
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.1,
            };

            // sonoConfig.NoiseReductionType = SNR.Key2NoiseReductionType("STANDARD");
            TimeSpan     recordingDuration = recording.Duration;
            int          sr           = recording.SampleRate;
            double       freqBinWidth = sr / (double)sonoConfig.WindowSize;
            BaseSonogram sonogram     = new SpectrogramStandard(sonoConfig, recording.WavReader);
            int          rowCount     = sonogram.Data.GetLength(0);
            int          colCount     = sonogram.Data.GetLength(1);

            // double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, 0, minBin, (rowCount - 1), maxbin);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            // This window is used to smooth the score array before extracting events.
            // A short window (e.g. 3) preserves sharper score edges to define events but also keeps noise.
            int scoreSmoothingWindow = 13;

            Oscillations2012.Execute(
                (SpectrogramStandard)sonogram,
                minHz,
                maxHz,
                dctDuration,
                minOscilFreq,
                maxOscilFreq,
                dctThreshold,
                eventThreshold,
                minDuration,
                maxDuration,
                scoreSmoothingWindow,
                out var scores,
                out var oscillationEvents,
                out var hits,
                segmentStartOffset);

            var acousticEvents = oscillationEvents.ConvertSpectralEventsToAcousticEvents();

            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = speciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.Name = abbreviatedSpeciesName;
            });

            var plot  = new Plot(this.DisplayName, scores, eventThreshold);
            var plots = new List <Plot> {
                plot
            };

            this.WriteDebugImage(recording, outputDirectory, sonogram, acousticEvents, plots, hits);

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plots,
                Events = acousticEvents,
            });
        }
        //////public static IndexCalculateResult Analysis(
        public static SpectralIndexValuesForContentDescription Analysis(
            AudioRecording recording,
            TimeSpan segmentOffsetTimeSpan,
            int sampleRateOfOriginalAudioFile,
            bool returnSonogramInfo = false)
        {
            // returnSonogramInfo = true; // if debugging
            double epsilon    = recording.Epsilon;
            int    sampleRate = recording.WavReader.SampleRate;

            //var segmentDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            var indexCalculationDuration = TimeSpan.FromSeconds(ContentSignatures.IndexCalculationDurationInSeconds);

            // Get FRAME parameters for the calculation of Acoustic Indices
            int frameSize = ContentSignatures.FrameSize;
            int frameStep = frameSize;                                 // that is, windowOverlap = zero

            double frameStepDuration = frameStep / (double)sampleRate; // fraction of a second
            var    frameStepTimeSpan = TimeSpan.FromTicks((long)(frameStepDuration * TimeSpan.TicksPerSecond));

            // INITIALISE a RESULTS STRUCTURE TO return
            // initialize a result object in which to store SummaryIndexValues and SpectralIndexValues etc.
            var config          = new IndexCalculateConfig(); // sets some default values
            int freqBinCount    = frameSize / 2;
            var indexProperties = GetIndexProperties();
            ////////var result = new IndexCalculateResult(freqBinCount, indexProperties, indexCalculationDuration, segmentOffsetTimeSpan, config);
            var spectralIndices = new SpectralIndexValuesForContentDescription();

            ///////result.SummaryIndexValues = null;
            ///////SpectralIndexValues spectralIndices = result.SpectralIndexValues;

            // set up default spectrogram to return
            ///////result.Sg = returnSonogramInfo ? GetSonogram(recording, windowSize: 1024) : null;
            ///////result.Hits = null;
            ///////result.TrackScores = new List<Plot>();

            // ################################## FINISHED SET-UP
            // ################################## NOW GET THE AMPLITUDE SPECTROGRAM

            // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT
            // Note that the amplitude spectrogram has had the DC bin removed. i.e. has only 256 columns.
            var dspOutput1           = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, frameStep);
            var amplitudeSpectrogram = dspOutput1.AmplitudeSpectrogram;

            // (B) ################################## EXTRACT OSC SPECTRAL INDEX DIRECTLY FROM THE RECORDING ##################################
            // Get the oscillation spectral index OSC separately from signal because need a different frame size etc.

            var sampleLength       = Oscillations2014.DefaultSampleLength;
            var frameLength        = Oscillations2014.DefaultFrameLength;
            var sensitivity        = Oscillations2014.DefaultSensitivityThreshold;
            var spectralIndexShort = Oscillations2014.GetSpectralIndex_Osc(recording, frameLength, sampleLength, sensitivity);

            // double length of the vector because want to work with 256 element vector for spectrogram purposes
            spectralIndices.OSC = DataTools.VectorDoubleLengthByAverageInterpolation(spectralIndexShort);

            // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE AMPLITUDE SPECTROGRAM ##################################

            // IFF there has been UP-SAMPLING, calculate bin of the original audio nyquist. this will be less than SR/2.
            // original sample rate can be anything 11.0-44.1 kHz.
            int originalNyquist = sampleRateOfOriginalAudioFile / 2;

            // if up-sampling has been done
            if (dspOutput1.NyquistFreq > originalNyquist)
            {
                dspOutput1.NyquistFreq = originalNyquist;
                dspOutput1.NyquistBin  = (int)Math.Floor(originalNyquist / dspOutput1.FreqBinWidth); // note that bin width does not change
            }

            // ii: CALCULATE THE ACOUSTIC COMPLEXITY INDEX
            spectralIndices.ACI = AcousticComplexityIndex.CalculateAci(amplitudeSpectrogram);

            // iii: CALCULATE the H(t) or Temporal ENTROPY Spectrum and then reverse the values i.e. calculate 1-Ht for energy concentration
            double[] temporalEntropySpectrum = AcousticEntropy.CalculateTemporalEntropySpectrum(amplitudeSpectrogram);
            for (int i = 0; i < temporalEntropySpectrum.Length; i++)
            {
                temporalEntropySpectrum[i] = 1 - temporalEntropySpectrum[i];
            }

            spectralIndices.ENT = temporalEntropySpectrum;

            // (C) ################################## EXTRACT SPECTRAL INDICES FROM THE DECIBEL SPECTROGRAM ##################################

            // i: Convert amplitude spectrogram to decibels and calculate the dB background noise profile
            double[,] decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(decibelSpectrogram);
            spectralIndices.BGN = spectralDecibelBgn;

            // ii: Calculate the noise reduced decibel spectrogram derived from segment recording.
            //     REUSE the var decibelSpectrogram but this time using dspOutput1.
            decibelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput1.AmplitudeSpectrogram, dspOutput1.WindowPower, sampleRate, epsilon);
            decibelSpectrogram = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogram, spectralDecibelBgn);
            decibelSpectrogram = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogram, nhThreshold: 2.0);

            // iii: CALCULATE noise reduced AVERAGE DECIBEL SPECTRUM
            spectralIndices.PMN = SpectrogramTools.CalculateAvgDecibelSpectrumFromDecibelSpectrogram(decibelSpectrogram);

            // ######################################################################################################################################################
            // iv: CALCULATE SPECTRAL COVER. NOTE: at this point, decibelSpectrogram is noise reduced. All values >= 0.0
            //           FreqBinWidth can be accessed, if required, through dspOutput1.FreqBinWidth
            // dB THRESHOLD for calculating spectral coverage
            double dBThreshold = ActivityAndCover.DefaultActivityThresholdDb;

            // Calculate lower and upper boundary bin ids.
            // Boundary between low & mid frequency bands is to avoid low freq bins containing anthropogenic noise. These biased index values away from bio-phony.
            int midFreqBound   = config.MidFreqBound;
            int lowFreqBound   = config.LowFreqBound;
            int lowerBinBound  = (int)Math.Ceiling(lowFreqBound / dspOutput1.FreqBinWidth);
            int middleBinBound = (int)Math.Ceiling(midFreqBound / dspOutput1.FreqBinWidth);
            var spActivity     = ActivityAndCover.CalculateSpectralEvents(decibelSpectrogram, dBThreshold, frameStepTimeSpan, lowerBinBound, middleBinBound);

            //spectralIndices.CVR = spActivity.CoverSpectrum;
            spectralIndices.EVN = spActivity.EventSpectrum;

            ///////result.TrackScores = null;
            ///////return result;
            return(spectralIndices);
        } // end calculation of Six Spectral Indices
Example #32
0
        /// <summary>
        /// Do your analysis. This method is called once per segment (typically one-minute segments).
        /// </summary>
        /// <param name="recording"></param>
        /// <param name="configuration"></param>
        /// <param name="segmentStartOffset"></param>
        /// <param name="getSpectralIndexes"></param>
        /// <param name="outputDirectory"></param>
        /// <param name="imageWidth"></param>
        /// <returns></returns>
        public override RecognizerResults Recognize(AudioRecording recording, Config configuration, TimeSpan segmentStartOffset, Lazy <IndexCalculateResult[]> getSpectralIndexes, DirectoryInfo outputDirectory, int?imageWidth)
        {
            var recognizerConfig = new LitoriaNasutaConfig();

            recognizerConfig.ReadConfigFile(configuration);

            // BETTER TO SET THESE. IGNORE USER!
            // this default framesize seems to work
            const int    frameSize     = 1024;
            const double windowOverlap = 0.0;

            // i: MAKE SONOGRAM
            var sonoConfig = new SonogramConfig
            {
                SourceFName   = recording.BaseName,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,

                // use the default HAMMING window
                //WindowFunction = WindowFunctions.HANNING.ToString(),
                //WindowFunction = WindowFunctions.NONE.ToString(),

                // if do not use noise reduction can get a more sensitive recogniser.
                //NoiseReductionType = NoiseReductionType.None
                NoiseReductionType      = NoiseReductionType.Standard,
                NoiseReductionParameter = 0.0,
            };

            TimeSpan recordingDuration = recording.WavReader.Time;
            int      sr               = recording.SampleRate;
            double   freqBinWidth     = sr / (double)sonoConfig.WindowSize;
            int      minBin           = (int)Math.Round(recognizerConfig.MinHz / freqBinWidth) + 1;
            int      maxBin           = (int)Math.Round(recognizerConfig.MaxHz / freqBinWidth) + 1;
            var      decibelThreshold = 3.0;

            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // ######################################################################
            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            int rowCount = sonogram.Data.GetLength(0);

            double[] amplitudeArray = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, rowCount - 1, maxBin);

            //double[] topBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, maxBin + 3, (rowCount - 1), maxBin + 9);
            //double[] botBand = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin - 3, (rowCount - 1), minBin - 9);

            // ii: DO THE ANALYSIS AND RECOVER SCORES OR WHATEVER
            var acousticEvents = AcousticEvent.ConvertScoreArray2Events(
                amplitudeArray,
                recognizerConfig.MinHz,
                recognizerConfig.MaxHz,
                sonogram.FramesPerSecond,
                freqBinWidth,
                decibelThreshold,
                recognizerConfig.MinDuration,
                recognizerConfig.MaxDuration,
                segmentStartOffset);

            double[,] hits = null;
            var prunedEvents = new List <AcousticEvent>();

            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = recognizerConfig.SpeciesName;
                ae.SegmentDurationSeconds = recordingDuration.TotalSeconds;
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.Name = recognizerConfig.AbbreviatedSpeciesName;
            });

            var thresholdedPlot = new double[amplitudeArray.Length];

            for (int x = 0; x < amplitudeArray.Length; x++)
            {
                if (amplitudeArray[x] > decibelThreshold)
                {
                    thresholdedPlot[x] = amplitudeArray[x];
                }
            }

            var maxDb = amplitudeArray.MaxOrDefault();

            double[] normalisedScores;
            double   normalisedThreshold;

            DataTools.Normalise(thresholdedPlot, decibelThreshold, out normalisedScores, out normalisedThreshold);
            var text = string.Format($"{this.DisplayName} (Fullscale={maxDb:f1}dB)");
            var plot = new Plot(text, normalisedScores, normalisedThreshold);

            if (true)
            {
                // display a variety of debug score arrays
                DataTools.Normalise(amplitudeArray, decibelThreshold, out normalisedScores, out normalisedThreshold);
                var amplPlot = new Plot("Band amplitude", normalisedScores, normalisedThreshold);

                var debugPlots = new List <Plot> {
                    plot, amplPlot
                };

                // NOTE: This DrawDebugImage() method can be over-written in this class.
                var debugImage = DrawDebugImage(sonogram, acousticEvents, debugPlots, hits);
                var debugPath  = FilenameHelpers.AnalysisResultPath(outputDirectory, recording.BaseName, this.SpeciesName, "png", "DebugSpectrogram");
                debugImage.Save(debugPath);
            }

            return(new RecognizerResults()
            {
                Sonogram = sonogram,
                Hits = hits,
                Plots = plot.AsList(),
                Events = acousticEvents,
            });
        }
        public void OctaveFrequencyScale2()
        {
            var recordingPath   = PathHelper.ResolveAsset(@"Recordings\MarineJasco_AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56-16bit-60sec.wav");
            var opFileStem      = "JascoMarineGBR1";
            var outputDir       = this.outputDirectory;
            var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Octave2ScaleSonogram.png");

            var recording = new AudioRecording(recordingPath);
            var fst       = FreqScaleType.Linear125Octaves7Tones28Nyquist32000;
            var freqScale = new FrequencyScale(fst);

            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.WindowSize,
                WindowOverlap           = 0.2,
                SourceFName             = recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);

            sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale);

            // DO NOISE REDUCTION
            var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data);

            sonogram.Data = dataMatrix;
            sonogram.Configuration.WindowSize = freqScale.WindowSize;

            var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            image.Save(outputImagePath, ImageFormat.Png);

            // DO FILE EQUALITY TESTS
            // Check that freqScale.OctaveBinBounds are correct
            var stemOfExpectedFile = opFileStem + "_Octave2ScaleBinBounds.EXPECTED.json";
            var stemOfActualFile   = opFileStem + "_Octave2ScaleBinBounds.ACTUAL.json";
            var expectedFile1      = PathHelper.ResolveAsset("FrequencyScale\\" + stemOfExpectedFile);

            if (!expectedFile1.Exists)
            {
                LoggedConsole.WriteErrorLine("An EXPECTED results file does not exist. Test will fail!");
                LoggedConsole.WriteErrorLine(
                    $"If ACTUAL results file is correct, move it to dir `{PathHelper.TestResources}` and change its suffix to <.EXPECTED.json>");
            }

            var resultFile1 = new FileInfo(Path.Combine(outputDir.FullName, stemOfActualFile));

            Json.Serialise(resultFile1, freqScale.BinBounds);
            FileEqualityHelpers.TextFileEqual(expectedFile1, resultFile1);

            // Check that freqScale.GridLineLocations are correct
            stemOfExpectedFile = opFileStem + "_Octave2ScaleGridLineLocations.EXPECTED.json";
            stemOfActualFile   = opFileStem + "_Octave2ScaleGridLineLocations.ACTUAL.json";
            var expectedFile2 = PathHelper.ResolveAsset("FrequencyScale\\" + stemOfExpectedFile);

            if (!expectedFile2.Exists)
            {
                LoggedConsole.WriteErrorLine("An EXPECTED results file does not exist. Test will fail!");
                LoggedConsole.WriteErrorLine(
                    $"If ACTUAL results file is correct, move it to dir `{PathHelper.TestResources}` and change its suffix to <.EXPECTED.json>");
            }

            var resultFile2 = new FileInfo(Path.Combine(outputDir.FullName, stemOfActualFile));

            Json.Serialise(resultFile2, freqScale.GridLineLocations);
            FileEqualityHelpers.TextFileEqual(expectedFile2, resultFile2);

            // Check that image dimensions are correct
            Assert.AreEqual(201, image.Width);
            Assert.AreEqual(310, image.Height);
        }