C# (CSharp) SpectrogramStandard.GetImage示例

编程语言: C# (CSharp)

方法/功能: GetImage

hotexamples.com的示例: 12

C# (CSharp) SpectrogramStandard.GetImage - 已找到12个示例。这些是从开源项目中提取的最受好评的SpectrogramStandard.GetImage现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

GetImageFullyAnnotated(14)

GetImage(12)

GetImageAnnotatedWithLinearHerzScale(2)

GetSpectralSonogram(2)

示例#1

显示文件

文件： FrequencyScaleTests.cs 项目： MazenImadJaber/audio-analysis

        public void LinearFrequencyScale()
        {
            var recordingPath   = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");
            var opFileStem      = "BAC2_20071008";
            var outputDir       = this.outputDirectory;
            var outputImagePath = Path.Combine(outputDir.FullName, "LinearScaleSonogram.png");

            var recording = new AudioRecording(recordingPath);

            // specfied linear scale
            int nyquist       = 11025;
            int frameSize     = 1024;
            int hertzInterval = 1000;
            var freqScale     = new FrequencyScale(nyquist, frameSize, hertzInterval);
            var fst           = freqScale.ScaleType;

            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.FinalBinCount * 2,
                WindowOverlap           = 0.2,
                SourceFName             = recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // DO NOISE REDUCTION
            var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data);

            sonogram.Data = dataMatrix;
            sonogram.Configuration.WindowSize = freqScale.WindowSize;

            var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            image.Save(outputImagePath, ImageFormat.Png);

            // DO FILE EQUALITY TEST
            var stemOfExpectedFile = opFileStem + "_LinearScaleGridLineLocations.EXPECTED.json";
            var stemOfActualFile   = opFileStem + "_LinearScaleGridLineLocations.ACTUAL.json";

            // Check that freqScale.GridLineLocations are correct
            var expectedFile1 = PathHelper.ResolveAsset("FrequencyScale\\" + stemOfExpectedFile);

            if (!expectedFile1.Exists)
            {
                LoggedConsole.WriteErrorLine("An EXPECTED results file does not exist. Test will fail!");
                LoggedConsole.WriteErrorLine(
                    $"If ACTUAL results file is correct, move it to dir `{PathHelper.TestResources}` and change its suffix to <.EXPECTED.json>");
            }

            var resultFile1 = new FileInfo(Path.Combine(outputDir.FullName, stemOfActualFile));

            Json.Serialise(resultFile1, freqScale.GridLineLocations);
            FileEqualityHelpers.TextFileEqual(expectedFile1, resultFile1);

            // Check that image dimensions are correct
            Assert.AreEqual(566, image.Height);
            Assert.AreEqual(1621, image.Width);
        }

示例#2

显示文件

        public void LinearFrequencyScaleDefault()
        {
            // relative path because post-Build command transfers files to ...\\Work\GitHub\...\bin\Debug subfolder.
            var recordingPath   = @"Recordings\BAC2_20071008-085040.wav";
            var opFileStem      = "BAC2_20071008";
            var outputDir       = this.outputDirectory;
            var outputImagePath = Path.Combine(outputDir.FullName, "DefaultLinearScaleSonogram.png");

            var recording = new AudioRecording(recordingPath);

            // default linear scale
            var fst       = FreqScaleType.Linear;
            var freqScale = new FrequencyScale(fst);

            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.FinalBinCount * 2,
                WindowOverlap           = 0.2,
                SourceFName             = recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            sonogram.Configuration.WindowSize = freqScale.WindowSize;

            // DO NOISE REDUCTION
            var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data);

            sonogram.Data = dataMatrix;

            var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            image.Save(outputImagePath, ImageFormat.Png);

            // DO UNIT TESTING
            var stemOfExpectedFile = opFileStem + "_DefaultLinearScaleGridLineLocations.EXPECTED.json";
            var stemOfActualFile   = opFileStem + "_DefaultLinearScaleGridLineLocations.ACTUAL.json";

            // Check that freqScale.GridLineLocations are correct
            var expectedFile1 = new FileInfo("FrequencyScale\\" + stemOfExpectedFile);

            if (!expectedFile1.Exists)
            {
                LoggedConsole.WriteErrorLine("An EXPECTED results file does not exist. Test will fail!");
                LoggedConsole.WriteErrorLine("If ACTUAL results file is correct, move it to dir <...\\TestResources\\FrequencyScale> and change its suffix to <.EXPECTED.json>");
            }

            var resultFile1 = new FileInfo(Path.Combine(outputDir.FullName, stemOfActualFile));

            Json.Serialise(resultFile1, freqScale.GridLineLocations);
            FileEqualityHelpers.TextFileEqual(expectedFile1, resultFile1);

            // Check that image dimensions are correct
            Assert.AreEqual(310, image.Height);
            Assert.AreEqual(3247, image.Width);
        }

示例#3

显示文件

        public void LinearFrequencyScale()
        {
            var recordingPath   = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");
            var outputImagePath = this.outputDirectory.CombineFile("DefaultLinearScaleSonogram.png");

            var recording = new AudioRecording(recordingPath);

            // specfied linear scale
            int nyquist       = 11025;
            int frameSize     = 1024;
            int hertzInterval = 1000;
            var freqScale     = new FrequencyScale(nyquist, frameSize, hertzInterval);
            var fst           = freqScale.ScaleType;

            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.FinalBinCount * 2,
                WindowOverlap           = 0.2,
                SourceFName             = recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // DO NOISE REDUCTION
            var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data);

            sonogram.Data = dataMatrix;
            sonogram.Configuration.WindowSize = freqScale.WindowSize;

            var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst, freqScale.GridLineLocations);

            image.Save(outputImagePath);

            var expected = new[, ]
            {
                { 46, 1000 },
                { 92, 2000 },
                { 139, 3000 },
                { 185, 4000 },
                { 232, 5000 },
                { 278, 6000 },
                { 325, 7000 },
                { 371, 8000 },
                { 417, 9000 },
                { 464, 10000 },
                { 510, 11000 },
            };

            Assert.That.MatricesAreEqual(expected, freqScale.GridLineLocations);

            // Check that image dimensions are correct
            Assert.AreEqual(566, image.Height);
            Assert.AreEqual(1621, image.Width);
        }

示例#4

显示文件

        public void LinearFrequencyScaleDefault()
        {
            var recordingPath   = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");
            var outputImagePath = this.outputDirectory.CombineFile("DefaultLinearScaleSonogram.png");

            var recording = new AudioRecording(recordingPath);

            // default linear scale
            var fst       = FreqScaleType.Linear;
            var freqScale = new FrequencyScale(fst);

            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.FinalBinCount * 2,
                WindowOverlap           = 0.2,
                SourceFName             = recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            sonogram.Configuration.WindowSize = freqScale.WindowSize;

            // DO NOISE REDUCTION
            var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data);

            sonogram.Data = dataMatrix;

            var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst, freqScale.GridLineLocations);

            image.Save(outputImagePath);

            // Check that freqScale.GridLineLocations are correct
            var expected = new[, ]
            {
                { 23, 1000 },
                { 46, 2000 },
                { 69, 3000 },
                { 92, 4000 },
                { 116, 5000 },
                { 139, 6000 },
                { 162, 7000 },
                { 185, 8000 },
                { 208, 9000 },
                { 232, 10000 },
                { 255, 11000 },
            };

            Assert.That.MatricesAreEqual(expected, freqScale.GridLineLocations);

            // Check that image dimensions are correct
            Assert.AreEqual(310, image.Height);
            Assert.AreEqual(3247, image.Width);
        }

示例#5

显示文件

        /// <summary>
        /// METHOD TO CHECK IF SPECIFIED linear FREQ SCALE IS WORKING
        /// Check it on standard one minute recording.
        /// </summary>
        public static void TESTMETHOD_LinearFrequencyScale()
        {
            var recordingPath      = @"C:\SensorNetworks\SoftwareTests\TestRecordings\BAC2_20071008-085040.wav";
            var outputDir          = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo();
            var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo();
            var outputImagePath    = Path.Combine(outputDir.FullName, "linearScaleSonogram.png");
            var opFileStem         = "BAC2_20071008";

            var recording = new AudioRecording(recordingPath);

            // specfied linear scale
            int nyquist       = 11025;
            int frameSize     = 1024;
            int hertzInterval = 1000;
            var freqScale     = new FrequencyScale(nyquist, frameSize, hertzInterval);
            var fst           = freqScale.ScaleType;

            var sonoConfig = new SonogramConfig
            {
                WindowSize    = freqScale.FinalBinCount * 2,
                WindowOverlap = 0.2,
                SourceFName   = recording.BaseName,

                //NoiseReductionType = NoiseReductionType.Standard,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // DO NOISE REDUCTION
            var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data);

            sonogram.Data = dataMatrix;
            sonogram.Configuration.WindowSize = freqScale.WindowSize;

            var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            image.Save(outputImagePath);

            // DO FILE EQUALITY TEST
            string testName         = "testName";
            var    expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "FrequencyLinearScaleTest.EXPECTED.json"));
            var    resultFile       = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "FrequencyLinearScaleTestResults.json"));

            Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations);
            TestTools.FileEqualityTest(testName, resultFile, expectedTestFile);

            LoggedConsole.WriteLine("Completed Linear Frequency Scale test");
            Console.WriteLine("\n\n");
        }

示例#6

显示文件

        /// <summary>
        /// METHOD TO CHECK IF SPECIFIED MEL FREQ SCALE IS WORKING
        /// Check it on standard one minute recording.
        /// </summary>
        public static void TESTMETHOD_MelFrequencyScale()
        {
            var recordingPath      = @"C:\SensorNetworks\SoftwareTests\TestRecordings\BAC2_20071008-085040.wav";
            var outputDir          = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo();
            var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo();
            var outputImagePath    = Path.Combine(outputDir.FullName, "melScaleSonogram.png");
            var opFileStem         = "BAC2_20071008";

            var recording = new AudioRecording(recordingPath);

            int           nyquist       = recording.Nyquist;
            int           frameSize     = 1024;
            int           finalBinCount = 256;
            int           hertzInterval = 1000;
            FreqScaleType scaleType     = FreqScaleType.Mel;
            var           freqScale     = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);
            var           fst           = freqScale.ScaleType;

            var sonoConfig = new SonogramConfig
            {
                WindowSize    = frameSize,
                WindowOverlap = 0.2,
                SourceFName   = recording.BaseName,
                DoMelScale    = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount   = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,

                //NoiseReductionType = NoiseReductionType.Standard,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // DRAW SPECTROGRAM
            var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            image.Save(outputImagePath);

            // DO FILE EQUALITY TEST
            string testName         = "MelTest";
            var    expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "MelFrequencyScaleTest.EXPECTED.json"));
            var    resultFile       = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "MelFrequencyLinearScaleTestResults.json"));

            Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations);
            TestTools.FileEqualityTest(testName, resultFile, expectedTestFile);

            LoggedConsole.WriteLine("Completed Mel Frequency Scale test");
            Console.WriteLine("\n\n");
        }

示例#7

显示文件

        /// <summary>
        /// Generate a Spectrogram.
        /// </summary>
        /// <param name="bytes">
        /// The bytes.
        /// </param>
        /// <returns>
        /// Spectrogram image.
        /// </returns>
        /// <exception cref="NotSupportedException"><c>NotSupportedException</c>.</exception>
        public Bitmap Spectrogram(byte[] bytes)
        {
            /*
             * 80 pixels per second is too quick for Silverlight.
             * we want to use 40 pixels per second (half - window size of 0)
             */

            var sonogramConfig = new SonogramConfig
            {
                WindowOverlap = 0,     // was 0.5 when ppms was 0.08
                WindowSize    = 512,
                DoSnr         = false, // might save us some time generating spectrograms.
            };

            Bitmap image;

            using (var audiorecording = new AudioRecording(bytes))
            {
                // audiorecording.ConvertSampleRate22kHz(); // THIS METHOD CALL IS OBSOLETE
                if (audiorecording.SampleRate != 22050)
                {
                    var msg = string.Format(
                        "Must be able to convert audio to 22050hz. Audio has sample rate of {0}.",
                        audiorecording.SampleRate);

                    throw new NotSupportedException(msg);
                }

                using (var sonogram = new SpectrogramStandard(sonogramConfig, audiorecording.WavReader))
                    using (var img = sonogram.GetImage())
                    {
                        image = new Bitmap(img);
                    }
            }

            return(image);
        }

示例#8

显示文件

        /// <summary>
        /// THE KEY ANALYSIS METHOD
        /// </summary>
        /// <param name="recording">
        ///     The segment Of Source File.
        /// </param>
        /// <param name="configDict">
        ///     The config Dict.
        /// </param>
        /// <param name="value"></param>
        /// <returns>
        /// The <see cref="LimnodynastesConvexResults"/>.
        /// </returns>
        internal static LimnodynastesConvexResults Analysis(
            Dictionary <string, double[, ]> dictionaryOfHiResSpectralIndices,
            AudioRecording recording,
            Dictionary <string, string> configDict,
            AnalysisSettings analysisSettings,
            SegmentSettingsBase segmentSettings)
        {
            // for Limnodynastes convex, in the D.Stewart CD, there are peaks close to:
            //1. 1950 Hz
            //2. 1460 hz
            //3.  970 hz    These are 490 Hz apart.
            // for Limnodynastes convex, in the JCU recording, there are peaks close to:
            //1. 1780 Hz
            //2. 1330 hz
            //3.  880 hz    These are 450 Hz apart.

            // So strategy is to look for three peaks separated by same amount and in the vicinity of the above,
            //  starting with highest power (the top peak) and working down to lowest power (bottom peak).

            var      outputDir          = segmentSettings.SegmentOutputDirectory;
            TimeSpan segmentStartOffset = segmentSettings.SegmentStartOffset;

            //KeyValuePair<string, double[,]> kvp = dictionaryOfHiResSpectralIndices.First();
            var spg         = dictionaryOfHiResSpectralIndices["RHZ"];
            int rhzRowCount = spg.GetLength(0);
            int rhzColCount = spg.GetLength(1);

            int    sampleRate        = recording.SampleRate;
            double herzPerBin        = sampleRate / 2 / (double)rhzRowCount;
            double scoreThreshold    = (double?)double.Parse(configDict["EventThreshold"]) ?? 3.0;
            int    minimumFrequency  = (int?)int.Parse(configDict["MinHz"]) ?? 850;
            int    dominantFrequency = (int?)int.Parse(configDict["DominantFrequency"]) ?? 1850;

            // # The Limnodynastes call has three major peaks. The dominant peak is at 1850 or as set above.
            // # The second and third peak are at equal gaps below. DominantFreq-gap and DominantFreq-(2*gap);
            // # Set the gap in the Config file. Should typically be in range 880 to 970
            int peakGapInHerz = (int?)int.Parse(configDict["PeakGap"]) ?? 470;
            int F1AndF2Gap    = (int)Math.Round(peakGapInHerz / herzPerBin);
            //int F1AndF2Gap = 10; // 10 = number of freq bins
            int F1AndF3Gap = 2 * F1AndF2Gap;
            //int F1AndF3Gap = 20;

            int hzBuffer       = 250;
            int bottomBin      = 5;
            int dominantBin    = (int)Math.Round(dominantFrequency / herzPerBin);
            int binBuffer      = (int)Math.Round(hzBuffer / herzPerBin);;
            int dominantBinMin = dominantBin - binBuffer;
            int dominantBinMax = dominantBin + binBuffer;

            //  freqBin + rowID = binCount - 1;
            // therefore: rowID = binCount - freqBin - 1;
            int minRowID  = rhzRowCount - dominantBinMax - 1;
            int maxRowID  = rhzRowCount - dominantBinMin - 1;
            int bottomRow = rhzRowCount - bottomBin - 1;

            var list = new List <Point>();

            // loop through all spectra/columns of the hi-res spectrogram.
            for (int c = 1; c < rhzColCount - 1; c++)
            {
                double maxAmplitude            = -double.MaxValue;
                int    idOfRowWithMaxAmplitude = 0;

                for (int r = minRowID; r <= bottomRow; r++)
                {
                    if (spg[r, c] > maxAmplitude)
                    {
                        maxAmplitude            = spg[r, c];
                        idOfRowWithMaxAmplitude = r;
                    }
                }

                if (idOfRowWithMaxAmplitude < minRowID)
                {
                    continue;
                }
                if (idOfRowWithMaxAmplitude > maxRowID)
                {
                    continue;
                }

                // want a spectral peak.
                if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c - 1])
                {
                    continue;
                }
                if (spg[idOfRowWithMaxAmplitude, c] < spg[idOfRowWithMaxAmplitude, c + 1])
                {
                    continue;
                }
                // peak should exceed thresold amplitude
                if (spg[idOfRowWithMaxAmplitude, c] < 3.0)
                {
                    continue;
                }

                // convert row ID to freq bin ID
                int freqBinID = rhzRowCount - idOfRowWithMaxAmplitude - 1;
                list.Add(new Point(c, freqBinID));
                // we now have a list of potential hits for LimCon. This needs to be filtered.

                // Console.WriteLine("Col {0}, Bin {1}  ", c, freqBinID);
            }

            // DEBUG ONLY // ################################ TEMPORARY ################################
            // superimpose point on RHZ HiRes spectrogram for debug purposes
            bool drawOnHiResSpectrogram = true;
            //string filePath = @"G:\SensorNetworks\Output\Frogs\TestOfHiResIndices-2016July\Test\Towsey.HiResIndices\SpectrogramImages\3mile_creek_dam_-_Herveys_Range_1076_248366_20130305_001700_30_0min.CombinedGreyScale.png";
            var    fileName   = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name);
            string filePath   = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScale.png";
            var    debugImage = new FileInfo(filePath);

            if (!debugImage.Exists)
            {
                drawOnHiResSpectrogram = false;
            }
            if (drawOnHiResSpectrogram)
            {
                // put red dot where max is
                Bitmap bmp = new Bitmap(filePath);
                foreach (Point point in list)
                {
                    bmp.SetPixel(point.X + 70, 1911 - point.Y, Color.Red);
                }
                // mark off every tenth frequency bin
                for (int r = 0; r < 26; r++)
                {
                    bmp.SetPixel(68, 1911 - (r * 10), Color.Blue);
                    bmp.SetPixel(69, 1911 - (r * 10), Color.Blue);
                }
                // mark off upper bound and lower frequency bound
                bmp.SetPixel(69, 1911 - dominantBinMin, Color.Lime);
                bmp.SetPixel(69, 1911 - dominantBinMax, Color.Lime);
                //bmp.SetPixel(69, 1911 - maxRowID, Color.Lime);
                string opFilePath = outputDir.FullName + @"\SpectrogramImages\" + fileName + ".CombinedGreyScaleAnnotated.png";
                bmp.Save(opFilePath);
            }
            // END DEBUG ################################ TEMPORARY ################################

            // now construct the standard decibel spectrogram WITHOUT noise removal, and look for LimConvex
            // get frame parameters for the analysis
            double epsilon   = Math.Pow(0.5, recording.BitsPerSample - 1);
            int    frameSize = rhzRowCount * 2;
            int    frameStep = frameSize; // this default = zero overlap
            double frameDurationInSeconds = frameSize / (double)sampleRate;
            double frameStepInSeconds     = frameStep / (double)sampleRate;
            double framesPerSec           = 1 / frameStepInSeconds;
            //var dspOutput = DSP_Frames.ExtractEnvelopeAndFFTs(recording, frameSize, frameStep);
            //// Generate deciBel spectrogram
            //double[,] deciBelSpectrogram = MFCCStuff.DecibelSpectra(dspOutput.amplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon);

            // i: Init SONOGRAM config
            var sonoConfig = new SonogramConfig
            {
                SourceFName        = recording.BaseName,
                WindowSize         = frameSize,
                WindowOverlap      = 0.0,
                NoiseReductionType = NoiseReductionType.None,
            };
            // init sonogram
            BaseSonogram sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

            // remove the DC row of the spectrogram
            sonogram.Data = MatrixTools.Submatrix(sonogram.Data, 0, 1, sonogram.Data.GetLength(0) - 1, sonogram.Data.GetLength(1) - 1);
            //scores.Add(new Plot("Decibels", DataTools.NormaliseMatrixValues(dBArray), ActivityAndCover.DefaultActivityThresholdDb));
            //scores.Add(new Plot("Active Frames", DataTools.Bool2Binary(activity.activeFrames), 0.0));

            // convert spectral peaks to frequency
            //var tuple_DecibelPeaks = SpectrogramTools.HistogramOfSpectralPeaks(deciBelSpectrogram);
            //int[] peaksBins = tuple_DecibelPeaks.Item2;
            //double[] freqPeaks = new double[peaksBins.Length];
            //int binCount = sonogram.Data.GetLength(1);
            //for (int i = 1; i < peaksBins.Length; i++) freqPeaks[i] = (lowerBinBound + peaksBins[i]) / (double)nyquistBin;
            //scores.Add(new Plot("Max Frequency", freqPeaks, 0.0));  // location of peaks for spectral images

            // create new list of LimCon hits in the standard spectrogram.
            double timeSpanOfFrameInSeconds = frameSize / (double)sampleRate;
            var    newList     = new List <int[]>();
            int    lastFrameID = sonogram.Data.GetLength(0) - 1;
            int    lastBinID   = sonogram.Data.GetLength(1) - 1;

            foreach (Point point in list)
            {
                double secondsFromStartOfSegment = (point.X * 0.1) + 0.05; // convert point.Y to center of time-block.
                int    framesFromStartOfSegment  = (int)Math.Round(secondsFromStartOfSegment / timeSpanOfFrameInSeconds);

                // location of max point is uncertain, so search in neighbourhood.
                // NOTE: sonogram.data matrix is time*freqBin
                double maxValue = -double.MaxValue;
                int    idOfTMax = framesFromStartOfSegment;
                int    idOfFMax = point.Y;
                for (int deltaT = -4; deltaT <= 4; deltaT++)
                {
                    for (int deltaF = -1; deltaF <= 1; deltaF++)
                    {
                        int newT = framesFromStartOfSegment + deltaT;
                        if (newT < 0)
                        {
                            newT = 0;
                        }
                        else if (newT > lastFrameID)
                        {
                            newT = lastFrameID;
                        }

                        double value = sonogram.Data[newT, point.Y + deltaF];
                        if (value > maxValue)
                        {
                            maxValue = value;
                            idOfTMax = framesFromStartOfSegment + deltaT;
                            idOfFMax = point.Y + deltaF;
                        }
                    }
                }

                // newList.Add(new Point(frameSpan, point.Y));
                int[] array = new int[2];
                array[0] = idOfTMax;
                array[1] = idOfFMax;
                newList.Add(array);
            }

            // Now obtain more of spectrogram to see if have peaks at two other places characteristic of Limnodynastes convex.
            // In the D.Stewart CD, there are peaks close to:
            //1. 1950 Hz
            //2. 1460 hz
            //3.  970 hz    These are 490 Hz apart.
            // For Limnodynastes convex, in the JCU recording, there are peaks close to:
            //1. 1780 Hz
            //2. 1330 hz
            //3.  880 hz    These are 450 Hz apart.

            // So strategy is to look for three peaks separated by same amount and in the vicinity of the above,
            //  starting with highest power (the top peak) and working down to lowest power (bottom peak).
            //We have found top/highest peak - now find the other two.
            int secondDominantFrequency = 1380;
            int secondDominantBin       = (int)Math.Round(secondDominantFrequency / herzPerBin);
            int thirdDominantFrequency  = 900;
            int thirdDominantBin        = (int)Math.Round(thirdDominantFrequency / herzPerBin);

            var acousticEvents = new List <AcousticEvent>();
            int Tbuffer        = 2;

            // First extract a sub-matrix.
            foreach (int[] array in newList)
            {
                // NOTE: sonogram.data matrix is time*freqBin
                int Tframe = array[0];
                int F1bin  = array[1];
                double[,] subMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - Tbuffer, 0, Tframe + Tbuffer, F1bin);
                double F1power = subMatrix[Tbuffer, F1bin];
                // convert to vector
                var spectrum = MatrixTools.GetColumnAverages(subMatrix);

                // use the following code to get estimate of background noise
                double[,] powerMatrix = MatrixTools.Submatrix(sonogram.Data, Tframe - 3, 10, Tframe + 3, F1bin);
                double averagePower = (MatrixTools.GetRowAverages(powerMatrix)).Average();
                double score        = F1power - averagePower;

                // debug - checking what the spectrum looks like.
                //for (int i = 0; i < 18; i++)
                //    spectrum[i] = -100.0;
                //DataTools.writeBarGraph(spectrum);

                // locate the peaks in lower frequency bands, F2 and F3
                bool[] peaks = DataTools.GetPeaks(spectrum);

                int    F2bin   = 0;
                double F2power = -200.0; // dB
                for (int i = -3; i <= 2; i++)
                {
                    int bin = F1bin - F1AndF2Gap + i;
                    if ((peaks[bin]) && (F2power < subMatrix[1, bin]))
                    {
                        F2bin   = bin;
                        F2power = subMatrix[1, bin];
                    }
                }
                if (F2bin == 0)
                {
                    continue;
                }
                if (F2power == -200.0)
                {
                    continue;
                }
                score += (F2power - averagePower);

                int    F3bin   = 0;
                double F3power = -200.0;
                for (int i = -5; i <= 2; i++)
                {
                    int bin = F1bin - F1AndF3Gap + i;
                    if ((peaks[bin]) && (F3power < subMatrix[1, bin]))
                    {
                        F3bin   = bin;
                        F3power = subMatrix[1, bin];
                    }
                }
                if (F3bin == 0)
                {
                    continue;
                }
                if (F3power == -200.0)
                {
                    continue;
                }

                score += (F3power - averagePower);
                score /= 3;

                // ignore events where SNR < decibel threshold
                if (score < scoreThreshold)
                {
                    continue;
                }

                // ignore events with wrong power distribution. A good LimnoConvex call has strongest F1 power
                if ((F3power > F1power) || (F2power > F1power))
                {
                    continue;
                }

                //freq Bin ID must be converted back to Matrix row ID
                //  freqBin + rowID = binCount - 1;
                // therefore: rowID = binCount - freqBin - 1;
                minRowID = rhzRowCount - F1bin - 2;
                maxRowID = rhzRowCount - F3bin - 1;
                int F1RowID = rhzRowCount - F1bin - 1;
                int F2RowID = rhzRowCount - F2bin - 1;
                int F3RowID = rhzRowCount - F3bin - 1;

                int    maxfreq             = dominantFrequency + hzBuffer;
                int    topBin              = (int)Math.Round(maxfreq / herzPerBin);
                int    frameCount          = 4;
                double duration            = frameCount * frameStepInSeconds;
                double startTimeWrtSegment = (Tframe - 2) * frameStepInSeconds;

                // Got to here so start initialising an acoustic event
                var ae = new AcousticEvent(segmentStartOffset, startTimeWrtSegment, duration, minimumFrequency, maxfreq);
                ae.SetTimeAndFreqScales(framesPerSec, herzPerBin);
                //var ae = new AcousticEvent(oblong, recording.Nyquist, binCount, frameDurationInSeconds, frameStepInSeconds, frameCount);
                //ae.StartOffset = TimeSpan.FromSeconds(Tframe * frameStepInSeconds);

                var pointF1 = new Point(2, topBin - F1bin);
                var pointF2 = new Point(2, topBin - F2bin);
                var pointF3 = new Point(2, topBin - F3bin);
                ae.Points = new List <Point>();
                ae.Points.Add(pointF1);
                ae.Points.Add(pointF2);
                ae.Points.Add(pointF3);
                //tried using HitElements but did not do what I wanted later on.
                //ae.HitElements = new HashSet<Point>();
                //ae.HitElements = new SortedSet<Point>();
                //ae.HitElements.Add(pointF1);
                //ae.HitElements.Add(pointF2);
                //ae.HitElements.Add(pointF3);
                ae.Score = score;
                //ae.MinFreq = Math.Round((topBin - F3bin - 5) * herzPerBin);
                //ae.MaxFreq = Math.Round(topBin * herzPerBin);
                acousticEvents.Add(ae);
            }

            // now add in extra common info to the acoustic events
            acousticEvents.ForEach(ae =>
            {
                ae.SpeciesName            = configDict[AnalysisKeys.SpeciesName];
                ae.SegmentStartSeconds    = segmentStartOffset.TotalSeconds;
                ae.SegmentDurationSeconds = recording.Duration.TotalSeconds;
                ae.Name         = abbreviatedName;
                ae.BorderColour = Color.Red;
                ae.FileName     = recording.BaseName;
            });

            double[] scores = new double[rhzColCount];          // predefinition of score array
            double   nomalisationConstant = scoreThreshold * 4; // four times the score threshold
            double   compressionFactor    = rhzColCount / (double)sonogram.Data.GetLength(0);

            foreach (AcousticEvent ae in acousticEvents)
            {
                ae.ScoreNormalised = ae.Score / nomalisationConstant;
                if (ae.ScoreNormalised > 1.0)
                {
                    ae.ScoreNormalised = 1.0;
                }
                int frameID      = (int)Math.Round(ae.EventStartSeconds / frameDurationInSeconds);
                int hiresFrameID = (int)Math.Floor(frameID * compressionFactor);
                scores[hiresFrameID] = ae.ScoreNormalised;
            }
            var plot = new Plot(AnalysisName, scores, scoreThreshold);

            // DEBUG ONLY ################################ TEMPORARY ################################
            // Draw a standard spectrogram and mark of hites etc.
            bool createStandardDebugSpectrogram = true;

            var imageDir = new DirectoryInfo(outputDir.FullName + @"\SpectrogramImages");

            if (!imageDir.Exists)
            {
                imageDir.Create();
            }
            if (createStandardDebugSpectrogram)
            {
                var    fileName2 = Path.GetFileNameWithoutExtension(segmentSettings.SegmentAudioFile.Name);
                string filePath2 = Path.Combine(imageDir.FullName, fileName + ".Spectrogram.png");
                Bitmap sonoBmp   = (Bitmap)sonogram.GetImage();
                int    height    = sonoBmp.Height;
                foreach (AcousticEvent ae in acousticEvents)
                {
                    ae.DrawEvent(sonoBmp);
                    //g.DrawRectangle(pen, ob.ColumnLeft, ob.RowTop, ob.ColWidth-1, ob.RowWidth);
                    //ae.DrawPoint(sonoBmp, ae.HitElements.[0], Color.OrangeRed);
                    //ae.DrawPoint(sonoBmp, ae.HitElements[1], Color.Yellow);
                    //ae.DrawPoint(sonoBmp, ae.HitElements[2], Color.Green);
                    ae.DrawPoint(sonoBmp, ae.Points[0], Color.OrangeRed);
                    ae.DrawPoint(sonoBmp, ae.Points[1], Color.Yellow);
                    ae.DrawPoint(sonoBmp, ae.Points[2], Color.LimeGreen);
                }

                // draw the original hits on the standard sonogram
                foreach (int[] array in newList)
                {
                    sonoBmp.SetPixel(array[0], height - array[1], Color.Cyan);
                }

                // mark off every tenth frequency bin on the standard sonogram
                for (int r = 0; r < 20; r++)
                {
                    sonoBmp.SetPixel(0, height - (r * 10) - 1, Color.Blue);
                    sonoBmp.SetPixel(1, height - (r * 10) - 1, Color.Blue);
                }
                // mark off upper bound and lower frequency bound
                sonoBmp.SetPixel(0, height - dominantBinMin, Color.Lime);
                sonoBmp.SetPixel(0, height - dominantBinMax, Color.Lime);
                sonoBmp.Save(filePath2);
            }
            // END DEBUG ################################ TEMPORARY ################################

            return(new LimnodynastesConvexResults
            {
                Sonogram = sonogram,
                Hits = null,
                Plot = plot,
                Events = acousticEvents,
                RecordingDuration = recording.Duration,
            });
        } // Analysis()

示例#9

显示文件

文件： PcaWhiteningTests.cs 项目： stewartmacdonald/audio-analysis

        public void TestPcaWhitening()
        {
            var recordingPath   = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");
            var outputDir       = this.outputDirectory;
            var outputImagePath = Path.Combine(outputDir.FullName, "ReconstrcutedSpectrogram.png");

            var fst       = FreqScaleType.Linear;
            var freqScale = new FrequencyScale(fst);
            var recording = new AudioRecording(recordingPath);

            var sonoConfig = new SonogramConfig
            {
                WindowSize              = freqScale.FinalBinCount * 2,
                WindowOverlap           = 0.2,
                SourceFName             = recording.BaseName,
                NoiseReductionType      = NoiseReductionType.None,
                NoiseReductionParameter = 0.0,
            };

            // GENERATE AMPLITUDE SPECTROGRAM
            var spectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader);

            spectrogram.Configuration.WindowSize = freqScale.WindowSize;

            // DO RMS NORMALIZATION
            spectrogram.Data = SNR.RmsNormalization(spectrogram.Data);

            // CONVERT NORMALIZED AMPLITUDE SPECTROGRAM TO dB SPECTROGRAM
            var sonogram = new SpectrogramStandard(spectrogram);

            // DO NOISE REDUCTION
            var dataMatrix = PcaWhitening.NoiseReduction(sonogram.Data);

            sonogram.Data = dataMatrix;

            // Do Patch Sampling
            int rows              = sonogram.Data.GetLength(0);
            int columns           = sonogram.Data.GetLength(1);
            int patchWidth        = columns;
            int patchHeight       = 1;
            int numberOfPatches   = (rows / patchHeight) * (columns / patchWidth);
            var sequentialPatches = PatchSampling.GetPatches(sonogram.Data, patchWidth, patchHeight, numberOfPatches, PatchSampling.SamplingMethod.Sequential);

            double[,] sequentialPatchMatrix = sequentialPatches.ToMatrix();

            // DO PCA WHITENING
            var whitenedSpectrogram = PcaWhitening.Whitening(sequentialPatchMatrix);

            // reconstructing the spectrogram from sequential patches and the projection matrix obtained from random patches
            var projectionMatrix = whitenedSpectrogram.ProjectionMatrix;//whitenedSpectrogram.projectionMatrix;
            var eigenVectors     = whitenedSpectrogram.EigenVectors;
            var numComponents    = whitenedSpectrogram.Components;

            double[,] reconstructedSpec = PcaWhitening.ReconstructSpectrogram(projectionMatrix, sequentialPatchMatrix, eigenVectors, numComponents);
            sonogram.Data = PatchSampling.ConvertPatches(reconstructedSpec, patchWidth, patchHeight, columns);
            var reconstructedSpecImage = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            reconstructedSpecImage.Save(outputImagePath, ImageFormat.Png);

            // DO UNIT TESTING
            Assert.AreEqual(spectrogram.Data.GetLength(0), sonogram.Data.GetLength(0));
            Assert.AreEqual(spectrogram.Data.GetLength(1), sonogram.Data.GetLength(1));
        }

示例#10

显示文件

        public static void Main(Arguments arguments)
        {
            //1. set up the necessary files
            //DirectoryInfo diSource = arguments.Source.Directory;
            FileInfo fiSourceRecording = arguments.Source;
            FileInfo fiConfig          = arguments.Config.ToFileInfo();
            FileInfo fiImage           = arguments.Output.ToFileInfo();

            fiImage.CreateParentDirectories();

            string title = "# CREATE FOUR (4) SONOGRAMS FROM AUDIO RECORDING";
            string date  = "# DATE AND TIME: " + DateTime.Now;

            LoggedConsole.WriteLine(title);
            LoggedConsole.WriteLine(date);
            LoggedConsole.WriteLine("# Input  audio file: " + fiSourceRecording.Name);
            LoggedConsole.WriteLine("# Output image file: " + fiImage);

            //2. get the config dictionary
            Config configuration = ConfigFile.Deserialize(fiConfig);

            //below three lines are examples of retrieving info from Config config
            //string analysisIdentifier = configuration[AnalysisKeys.AnalysisName];
            //bool saveIntermediateWavFiles = (bool?)configuration[AnalysisKeys.SaveIntermediateWavFiles] ?? false;
            //scoreThreshold = (double?)configuration[AnalysisKeys.EventThreshold] ?? scoreThreshold;

            //3 transfer conogram parameters to a dictionary to be passed around
            var configDict = new Dictionary <string, string>();

            // #Resample rate must be 2 X the desired Nyquist. Default is that of recording.
            configDict["ResampleRate"] = (configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? 17640).ToString();
            configDict["FrameLength"]  = configuration[AnalysisKeys.FrameLength] ?? "512";
            int frameSize = configuration.GetIntOrNull(AnalysisKeys.FrameLength) ?? 512;

            // #Frame Overlap as fraction: default=0.0
            configDict["FrameOverlap"] = configuration[AnalysisKeys.FrameOverlap] ?? "0.0";
            double windowOverlap = configuration.GetDoubleOrNull(AnalysisKeys.FrameOverlap) ?? 0.0;

            // #MinHz: 500
            // #MaxHz: 3500
            // #NOISE REDUCTION PARAMETERS
            configDict["DoNoiseReduction"] = configuration["DoNoiseReduction"] ?? "true";
            configDict["BgNoiseThreshold"] = configuration["BgNoiseThreshold"] ?? "3.0";

            configDict["ADD_AXES"]             = configuration["ADD_AXES"] ?? "true";
            configDict["AddSegmentationTrack"] = configuration["AddSegmentationTrack"] ?? "true";

            // 3: GET RECORDING
            var startOffsetMins = TimeSpan.Zero;
            var endOffsetMins   = TimeSpan.Zero;

            FileInfo fiOutputSegment = fiSourceRecording;

            if (!(startOffsetMins == TimeSpan.Zero && endOffsetMins == TimeSpan.Zero))
            {
                var buffer = new TimeSpan(0, 0, 0);
                fiOutputSegment = new FileInfo(Path.Combine(fiImage.DirectoryName, "tempWavFile.wav"));

                //This method extracts segment and saves to disk at the location fiOutputSegment.
                var resampleRate = configuration.GetIntOrNull(AnalysisKeys.ResampleRate) ?? AppConfigHelper.DefaultTargetSampleRate;
                AudioRecording.ExtractSegment(fiSourceRecording, startOffsetMins, endOffsetMins, buffer, resampleRate, fiOutputSegment);
            }

            var recording = new AudioRecording(fiOutputSegment.FullName);

            // EXTRACT ENVELOPE and SPECTROGRAM
            var dspOutput = DSP_Frames.ExtractEnvelopeAndFfts(recording, frameSize, windowOverlap);

            // average absolute value over the minute recording
            ////double[] avAbsolute = dspOutput.Average;

            // (A) ################################## EXTRACT INDICES FROM THE SIGNAL WAVEFORM ##################################
            // var wavDuration = TimeSpan.FromSeconds(recording.WavReader.Time.TotalSeconds);
            // double totalSeconds = wavDuration.TotalSeconds;

            // double[] signalEnvelope = dspOutput.Envelope;
            // double avSignalEnvelope = signalEnvelope.Average();
            // double[] frameEnergy = dspOutput.FrameEnergy;
            // double highAmplIndex = dspOutput.HighAmplitudeCount / totalSeconds;
            // double binWidth = dspOutput.BinWidth;
            // int nyquistBin = dspOutput.NyquistBin;
            // dspOutput.WindowPower,
            // dspOutput.FreqBinWidth
            int    nyquistFreq = dspOutput.NyquistFreq;
            double epsilon     = recording.Epsilon;

            // i: prepare amplitude spectrogram
            double[,] amplitudeSpectrogramData = dspOutput.AmplitudeSpectrogram; // get amplitude spectrogram.
            var image1 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(amplitudeSpectrogramData));

            // ii: prepare decibel spectrogram prior to noise removal
            double[,] decibelSpectrogramdata = MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, recording.SampleRate, epsilon);
            decibelSpectrogramdata           = MatrixTools.NormaliseMatrixValues(decibelSpectrogramdata);
            var image2 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata));

            // iii: Calculate background noise spectrum in decibels
            // Calculate noise value for each freq bin.
            double sdCount        = 0.0; // number of SDs above the mean for noise removal
            var    decibelProfile = NoiseProfile.CalculateModalNoiseProfile(decibelSpectrogramdata, sdCount);

            // DataTools.writeBarGraph(dBProfile.NoiseMode);

            // iv: Prepare noise reduced spectrogram
            decibelSpectrogramdata = SNR.TruncateBgNoiseFromSpectrogram(decibelSpectrogramdata, decibelProfile.NoiseThresholds);

            //double dBThreshold = 1.0; // SPECTRAL dB THRESHOLD for smoothing background
            //decibelSpectrogramdata = SNR.RemoveNeighbourhoodBackgroundNoise(decibelSpectrogramdata, dBThreshold);
            var image3 = ImageTools.DrawReversedMatrix(MatrixTools.MatrixRotate90Anticlockwise(decibelSpectrogramdata));

            // prepare new sonogram config and draw second image going down different code pathway
            var config = new SonogramConfig
            {
                MinFreqBand             = 0,
                MaxFreqBand             = 10000,
                NoiseReductionType      = SNR.KeyToNoiseReductionType("Standard"),
                NoiseReductionParameter = 1.0,
                WindowSize    = frameSize,
                WindowOverlap = windowOverlap,
            };

            //var mfccConfig = new MfccConfiguration(config);
            int  bandCount  = config.mfccConfig.FilterbankCount;
            bool doMelScale = config.mfccConfig.DoMelScale;
            int  ccCount    = config.mfccConfig.CcCount;
            int  fftBins    = config.FreqBinCount; //number of Hz bands = 2^N +1 because includes the DC band
            int  minHz      = config.MinFreqBand ?? 0;
            int  maxHz      = config.MaxFreqBand ?? nyquistFreq;

            var standardSonogram = new SpectrogramStandard(config, recording.WavReader);
            var image4           = standardSonogram.GetImage();

            // TODO next line crashes - does not produce cepstral sonogram.
            //SpectrogramCepstral cepSng = new SpectrogramCepstral(config, recording.WavReader);
            //Image image5 = cepSng.GetImage();

            //var mti = SpectrogramTools.Sonogram2MultiTrackImage(sonogram, configDict);
            //var image = mti.GetImage();

            //Image image = SpectrogramTools.Matrix2SonogramImage(deciBelSpectrogram, config);
            //Image image = SpectrogramTools.Audio2SonogramImage(FileInfo fiAudio, Dictionary<string, string> configDict);

            //prepare sonogram images
            var protoImage6 = new Image_MultiTrack(standardSonogram.GetImage(doHighlightSubband: false, add1KHzLines: true, doMelScale: false));

            protoImage6.AddTrack(ImageTrack.GetTimeTrack(standardSonogram.Duration, standardSonogram.FramesPerSecond));
            protoImage6.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, protoImage6.SonogramImage.Width));
            protoImage6.AddTrack(ImageTrack.GetSegmentationTrack(standardSonogram));
            var image6 = protoImage6.GetImage();

            var list = new List <Image <Rgb24> >();

            list.Add(image1); // amplitude spectrogram
            list.Add(image2); // decibel spectrogram before noise removal
            list.Add(image3); // decibel spectrogram after noise removal
            list.Add(image4); // second version of noise reduced spectrogram

            //list.Add(image5); // ceptral sonogram
            list.Add(image6.CloneAs <Rgb24>()); // multitrack image

            Image finalImage = ImageTools.CombineImagesVertically(list);

            finalImage.Save(fiImage.FullName);

            ////2: NOISE REMOVAL
            //double[,] originalSg = sonogram.Data;
            //double[,] mnr        = sonogram.Data;
            //mnr = ImageTools.WienerFilter(mnr, 3);

            //double backgroundThreshold = 4.0;   //SETS MIN DECIBEL BOUND
            //var output = SNR.NoiseReduce(mnr, NoiseReductionType.STANDARD, backgroundThreshold);

            //double ConfigRange = 70;        //sets the the max dB
            //mnr = SNR.SetConfigRange(output.Item1, 0.0, ConfigRange);

            ////3: Spectral tracks sonogram
            //byte[,] binary = MatrixTools.IdentifySpectralRidges(mnr);
            //binary = MatrixTools.ThresholdBinarySpectrum(binary, mnr, 10);
            //binary = MatrixTools.RemoveOrphanOnesInBinaryMatrix(binary);
            ////binary = MatrixTools.PickOutLines(binary); //syntactic approach

            //sonogram.SetBinarySpectrum(binary);
            ////sonogram.Data = SNR.SpectralRidges2Intensity(binary, originalSg);

            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, false));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration, sonogram.FramesPerSecond));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.sonogramImage.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_tracks.png";
            //image.Save(fn);
            //LoggedConsole.WriteLine("Spectral tracks sonogram to file: " + fn);

            //3: prepare image of spectral peaks sonogram
            //sonogram.Data = SNR.NoiseReduce_Peaks(originalSg, dynamicRange);
            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_peaks.png";
            //image.Save(fn);

            //LoggedConsole.WriteLine("Spectral peaks  sonogram to file: " + fn);

            //4: Sobel approach
            //sonogram.Data = SNR.NoiseReduce_Sobel(originalSg, dynamicRange);
            //image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
            //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
            //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
            //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
            //fn = outputFolder + wavFileName + "_sobel.png";
            //image.Save(fn);
            //LoggedConsole.WriteLine("Sobel sonogram to file: " + fn);

            // I1.txt contains the sonogram matrix produced by matlab
            //string matlabFile = @"C:\SensorNetworks\Software\AudioAnalysis\AED\Test\matlab\GParrots_JB2_20090607-173000.wav_minute_3\I1.txt";
            //double[,] matlabMatrix = Util.fileToMatrix(matlabFile, 256, 5166);

            //LoggedConsole.WriteLine(matrix[0, 2] + " vs " + matlabMatrix[254, 0]);
            //LoggedConsole.WriteLine(matrix[0, 3] + " vs " + matlabMatrix[253, 0]);

            // TODO put this back once sonogram issues resolved

            /*
             * LoggedConsole.WriteLine("START: AED");
             * IEnumerable<Oblong> oblongs = AcousticEventDetection.detectEvents(3.0, 100, matrix);
             * LoggedConsole.WriteLine("END: AED");
             *
             *
             * //set up static variables for init Acoustic events
             * //AcousticEvent.   doMelScale = config.DoMelScale;
             * AcousticEvent.FreqBinCount = config.FreqBinCount;
             * AcousticEvent.FreqBinWidth = config.FftConfig.NyquistFreq / (double)config.FreqBinCount;
             * //  int minF        = (int)config.MinFreqBand;
             * //  int maxF        = (int)config.MaxFreqBand;
             * AcousticEvent.FrameDuration = config.GetFrameOffset();
             *
             *
             * var events = new List<EventPatternRecog.Rectangle>();
             * foreach (Oblong o in oblongs)
             * {
             *  var e = new AcousticEvent(o);
             *  events.Add(new EventPatternRecog.Rectangle(e.StartTime, (double) e.MaxFreq, e.StartTime + e.Duration, (double)e.MinFreq));
             *  //LoggedConsole.WriteLine(e.StartTime + "," + e.Duration + "," + e.MinFreq + "," + e.MaxFreq);
             * }
             *
             * LoggedConsole.WriteLine("# AED events: " + events.Count);
             *
             * LoggedConsole.WriteLine("START: EPR");
             * IEnumerable<EventPatternRecog.Rectangle> eprRects = EventPatternRecog.detectGroundParrots(events);
             * LoggedConsole.WriteLine("END: EPR");
             *
             * var eprEvents = new List<AcousticEvent>();
             * foreach (EventPatternRecog.Rectangle r in eprRects)
             * {
             *  var ae = new AcousticEvent(r.Left, r.Right - r.Left, r.Bottom, r.Top, false);
             *  LoggedConsole.WriteLine(ae.WriteProperties());
             *  eprEvents.Add(ae);
             * }
             *
             * string imagePath = Path.Combine(outputFolder, "RESULTS_" + Path.GetFileNameWithoutExtension(recording.BaseName) + ".png");
             *
             * bool doHighlightSubband = false; bool add1kHzLines = true;
             * var image = new Image_MultiTrack(sonogram.GetImage(doHighlightSubband, add1kHzLines));
             * //image.AddTrack(ImageTrack.GetTimeTrack(sonogram.Duration));
             * //image.AddTrack(ImageTrack.GetWavEnvelopeTrack(recording, image.Image.Width));
             * //image.AddTrack(ImageTrack.GetSegmentationTrack(sonogram));
             * image.AddEvents(eprEvents);
             * image.Save(outputFolder + wavFileName + ".png");
             */

            LoggedConsole.WriteLine("\nFINISHED!");
        }

示例#11

显示文件

文件： UnsupervisedFeatureLearningTest.cs 项目： gitter-badger/audio-analysis

        public void TestFeatureLearning()
        {
            // var outputDir = this.outputDirectory;
            var resultDir  = PathHelper.ResolveAssetPath("FeatureLearning");
            var folderPath = Path.Combine(resultDir, "random_audio_segments"); // Liz

            // PathHelper.ResolveAssetPath(@"C:\Users\kholghim\Mahnoosh\PcaWhitening\random_audio_segments\1192_1000");
            // var resultDir = PathHelper.ResolveAssetPath(@"C:\Users\kholghim\Mahnoosh\PcaWhitening");
            var outputMelImagePath             = Path.Combine(resultDir, "MelScaleSpectrogram.png");
            var outputNormMelImagePath         = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png");
            var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png");
            var outputReSpecImagePath          = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png");

            // var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp");

            // +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1000 random 1-min recordings from Gympie

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(folderPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty...");
            }

            // get the nyquist value from the first wav file in the folder of recordings
            int nq = new AudioRecording(Directory.GetFiles(folderPath, "*.wav")[0]).Nyquist;

            int           nyquist       = nq;  // 11025;
            int           frameSize     = 1024;
            int           finalBinCount = 128; // 256; // 100; // 40; // 200; //
            int           hertzInterval = 1000;
            FreqScaleType scaleType     = FreqScaleType.Mel;
            var           freqScale     = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);
            var           fst           = freqScale.ScaleType;

            var sonoConfig = new SonogramConfig
            {
                WindowSize = frameSize,

                // since each 24 frames duration is equal to 1 second
                WindowOverlap      = 0.1028,
                DoMelScale         = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount        = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType = NoiseReductionType.None,
            };

            /*
             * // testing
             * var recordingPath3 = PathHelper.ResolveAsset(folderPath, "SM304264_0+1_20160421_024539_46-47min.wav");
             * var recording3 = new AudioRecording(recordingPath3);
             * var sonogram3 = new SpectrogramStandard(sonoConfig, recording3.WavReader);
             *
             * // DO DRAW SPECTROGRAM
             * var image4 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "MELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
             * image4.Save(outputMelImagePath);
             *
             * // Do RMS normalization
             * sonogram3.Data = SNR.RmsNormalization(sonogram3.Data);
             * var image5 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
             * image5.Save(outputNormMelImagePath);
             *
             * // NOISE REDUCTION
             * sonogram3.Data = PcaWhitening.NoiseReduction(sonogram3.Data);
             * var image6 = sonogram3.GetImageFullyAnnotated(sonogram3.GetImage(), "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);
             * image6.Save(outputNoiseReducedMelImagePath);
             *
             * //testing
             */

            // Define the minFreBin and MaxFreqBin to be able to work at arbitrary frequency bin bounds.
            // The default value is minFreqBin = 1 and maxFreqBin = finalBinCount.
            // To work with arbitrary frequency bin bounds we need to manually set these two parameters.
            int minFreqBin       = 40;                                          //1
            int maxFreqBin       = 80;                                          //finalBinCount;
            int numFreqBand      = 1;                                           //4;
            int patchWidth       = (maxFreqBin - minFreqBin + 1) / numFreqBand; // finalBinCount / numFreqBand;
            int patchHeight      = 1;                                           // 2; // 4; // 16; // 6; // Frame size
            int numRandomPatches = 20;                                          // 40; // 80; // 30; // 100; // 500; //

            // int fileCount = Directory.GetFiles(folderPath, "*.wav").Length;

            // Define variable number of "randomPatch" lists based on "numFreqBand"
            Dictionary <string, List <double[, ]> > randomPatchLists = new Dictionary <string, List <double[, ]> >();

            for (int i = 0; i < numFreqBand; i++)
            {
                randomPatchLists.Add(string.Format("randomPatch{0}", i.ToString()), new List <double[, ]>());
            }

            List <double[, ]> randomPatches = new List <double[, ]>();

            /*
             * foreach (string filePath in Directory.GetFiles(folderPath, "*.wav"))
             * {
             *  FileInfo f = filePath.ToFileInfo();
             *  if (f.Length == 0)
             *  {
             *      Debug.WriteLine(f.Name);
             *  }
             * }
             */
            double[,] inputMatrix;

            foreach (string filePath in Directory.GetFiles(folderPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    sonoConfig.SourceFName = recording.BaseName;

                    var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

                    // DO RMS NORMALIZATION
                    sonogram.Data = SNR.RmsNormalization(sonogram.Data);

                    // DO NOISE REDUCTION
                    // sonogram.Data = SNR.NoiseReduce_Median(sonogram.Data, nhBackgroundThreshold: 2.0);
                    sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data);

                    // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
                    if (minFreqBin != 1 || maxFreqBin != finalBinCount)
                    {
                        inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(sonogram.Data, minFreqBin, maxFreqBin);
                    }
                    else
                    {
                        inputMatrix = sonogram.Data;
                    }

                    // creating matrices from different freq bands of the source spectrogram
                    List <double[, ]> allSubmatrices = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);

                    // Second: selecting random patches from each freq band matrix and add them to the corresponding patch list
                    int count = 0;
                    while (count < allSubmatrices.Count)
                    {
                        randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
                                                                               .GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numRandomPatches,
                                                                                           PatchSampling.SamplingMethod.Random).ToMatrix());
                        count++;
                    }
                }
            }

            foreach (string key in randomPatchLists.Keys)
            {
                randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key]));
            }

            // convert list of random patches matrices to one matrix
            int numberOfClusters = 50; //256; // 128; // 64; // 32; // 10; //
            List <double[][]> allBandsCentroids = new List <double[][]>();
            List <KMeansClusterCollection> allClusteringOutput = new List <KMeansClusterCollection>();

            for (int i = 0; i < randomPatches.Count; i++)
            {
                double[,] patchMatrix = randomPatches[i];

                // Apply PCA Whitening
                var whitenedSpectrogram = PcaWhitening.Whitening(true, patchMatrix);

                // Do k-means clustering
                var clusteringOutput = KmeansClustering.Clustering(whitenedSpectrogram.Reversion, numberOfClusters);

                // var clusteringOutput = KmeansClustering.Clustering(patchMatrix, noOfClusters, pathToClusterCsvFile);

                // writing centroids to a csv file
                // note that Csv.WriteToCsv can't write data types like dictionary<int, double[]> (problems with arrays)
                // I converted the dictionary values to a matrix and used the Csv.WriteMatrixToCsv
                // it might be a better way to do this
                string pathToClusterCsvFile = Path.Combine(resultDir, "ClusterCentroids" + i.ToString() + ".csv");
                var    clusterCentroids     = clusteringOutput.ClusterIdCentroid.Values.ToArray();
                Csv.WriteMatrixToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids.ToMatrix());

                //Csv.WriteToCsv(pathToClusterCsvFile.ToFileInfo(), clusterCentroids);

                // sorting clusters based on size and output it to a csv file
                Dictionary <int, double> clusterIdSize = clusteringOutput.ClusterIdSize;
                int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize);

                // Write cluster ID and size to a CSV file
                string pathToClusterSizeCsvFile = Path.Combine(resultDir, "ClusterSize" + i.ToString() + ".csv");
                Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize);

                // Draw cluster image directly from clustering output
                List <KeyValuePair <int, double[]> > list = clusteringOutput.ClusterIdCentroid.ToList();
                double[][] centroids = new double[list.Count][];

                for (int j = 0; j < list.Count; j++)
                {
                    centroids[j] = list[j].Value;
                }

                allBandsCentroids.Add(centroids);
                allClusteringOutput.Add(clusteringOutput.Clusters);

                List <double[, ]> allCentroids = new List <double[, ]>();
                for (int k = 0; k < centroids.Length; k++)
                {
                    // convert each centroid to a matrix in order of cluster ID
                    // double[,] cent = PatchSampling.ArrayToMatrixByColumn(centroids[i], patchWidth, patchHeight);
                    // OR: in order of cluster size
                    double[,] cent =
                        MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, patchHeight);

                    // normalize each centroid
                    double[,] normCent = DataTools.normalise(cent);

                    // add a row of zero to each centroid
                    double[,] cent2 = PatchSampling.AddRow(normCent);

                    allCentroids.Add(cent2);
                }

                // concatenate all centroids
                double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids);

                // Draw clusters
                // int gridInterval = 1000;
                // var freqScale = new FrequencyScale(FreqScaleType.Mel, nyquist, frameSize, finalBinCount, gridInterval);

                var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix);
                clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone);

                // clusterImage.Save(outputClusterImagePath, ImageFormat.Bmp);

                var outputClusteringImage = Path.Combine(resultDir, "ClustersWithGrid" + i.ToString() + ".bmp");

                // Image bmp = Image.Load<Rgb24>(filename);
                FrequencyScale.DrawFrequencyLinesOnImage((Image <Rgb24>)clusterImage, freqScale, includeLabels: false);
                clusterImage.Save(outputClusteringImage);
            }

            //+++++++++++++++++++++++++++++++++++++++++++++++++++++Processing and generating features for the target recordings
            var recording2Path = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");

            // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_353972_20160303_055854_60_0.wav");    // folder with 1000 files
            // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_353887_20151230_042625_60_0.wav");    // folder with 1000 files
            // var recording2Path = PathHelper.ResolveAsset(folderPath, "gympie_np_1192_354744_20151018_053923_60_0.wav");  // folder with 100 files

            var recording2 = new AudioRecording(recording2Path);
            var sonogram2  = new SpectrogramStandard(sonoConfig, recording2.WavReader);

            // DO DRAW SPECTROGRAM
            var image = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "MELSPECTROGRAM: " + fst.ToString(),
                                                         freqScale.GridLineLocations);

            image.Save(outputMelImagePath);

            // Do RMS normalization
            sonogram2.Data = SNR.RmsNormalization(sonogram2.Data);
            var image2 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(),
                                                          "NORMALISEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            image2.Save(outputNormMelImagePath);

            // NOISE REDUCTION
            sonogram2.Data = PcaWhitening.NoiseReduction(sonogram2.Data);
            var image3 = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(),
                                                          "NOISEREDUCEDMELSPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations);

            image3.Save(outputNoiseReducedMelImagePath);

            // check whether the full band spectrogram is needed or a matrix with arbitrary freq bins
            if (minFreqBin != 1 || maxFreqBin != finalBinCount)
            {
                inputMatrix = PatchSampling.GetArbitraryFreqBandMatrix(sonogram2.Data, minFreqBin, maxFreqBin);
            }
            else
            {
                inputMatrix = sonogram2.Data;
            }

            // extracting sequential patches from the target spectrogram
            List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(inputMatrix, numFreqBand);

            double[][,] matrices2 = allSubmatrices2.ToArray();
            List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>();

            for (int i = 0; i < matrices2.GetLength(0); i++)
            {
                int rows              = matrices2[i].GetLength(0);
                int columns           = matrices2[i].GetLength(1);
                var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight,
                                                                 (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
                allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix());
            }

            // +++++++++++++++++++++++++++++++++++Feature Transformation
            // to do the feature transformation, we normalize centroids and
            // sequential patches from the input spectrogram to unit length
            // Then, we calculate the dot product of each patch with the centroids' matrix

            List <double[][]> allNormCentroids = new List <double[][]>();

            for (int i = 0; i < allBandsCentroids.Count; i++)
            {
                // double check the index of the list
                double[][] normCentroids = new double[allBandsCentroids.ToArray()[i].GetLength(0)][];
                for (int j = 0; j < allBandsCentroids.ToArray()[i].GetLength(0); j++)
                {
                    normCentroids[j] = ART_2A.NormaliseVector(allBandsCentroids.ToArray()[i][j]);
                }

                allNormCentroids.Add(normCentroids);
            }

            List <double[][]> allFeatureTransVectors = new List <double[][]>();

            for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
            {
                double[][] featureTransVectors = new double[allSequentialPatchMatrix.ToArray()[i].GetLength(0)][];
                for (int j = 0; j < allSequentialPatchMatrix.ToArray()[i].GetLength(0); j++)
                {
                    var normVector =
                        ART_2A.NormaliseVector(allSequentialPatchMatrix.ToArray()[i]
                                               .ToJagged()[j]); // normalize each patch to unit length
                    featureTransVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
                }

                allFeatureTransVectors.Add(featureTransVectors);
            }

            // +++++++++++++++++++++++++++++++++++Feature Transformation

            // +++++++++++++++++++++++++++++++++++Temporal Summarization
            // The resolution to generate features is 1 second
            // Each 24 single-frame patches form 1 second
            // for each 24 patch, we generate 3 vectors of mean, std, and max
            // The pre-assumption is that each input spectrogram is 1 minute

            List <double[, ]> allMeanFeatureVectors = new List <double[, ]>();
            List <double[, ]> allMaxFeatureVectors  = new List <double[, ]>();
            List <double[, ]> allStdFeatureVectors  = new List <double[, ]>();

            // number of frames needs to be concatenated to form 1 second. Each 24 frames make 1 second.
            int numFrames = (24 / patchHeight) * 60;

            foreach (var freqBandFeature in allFeatureTransVectors)
            {
                // store features of different bands in lists
                List <double[]> meanFeatureVectors = new List <double[]>();
                List <double[]> maxFeatureVectors  = new List <double[]>();
                List <double[]> stdFeatureVectors  = new List <double[]>();
                int             c = 0;
                while (c + numFrames < freqBandFeature.GetLength(0))
                {
                    // First, make a list of patches that would be equal to 1 second
                    List <double[]> sequencesOfFramesList = new List <double[]>();
                    for (int i = c; i < c + numFrames; i++)
                    {
                        sequencesOfFramesList.Add(freqBandFeature[i]);
                    }

                    List <double> mean = new List <double>();
                    List <double> std  = new List <double>();
                    List <double> max  = new List <double>();
                    double[,] sequencesOfFrames = sequencesOfFramesList.ToArray().ToMatrix();

                    // int len = sequencesOfFrames.GetLength(1);

                    // Second, calculate mean, max, and standard deviation of six vectors element-wise
                    for (int j = 0; j < sequencesOfFrames.GetLength(1); j++)
                    {
                        double[] temp = new double[sequencesOfFrames.GetLength(0)];
                        for (int k = 0; k < sequencesOfFrames.GetLength(0); k++)
                        {
                            temp[k] = sequencesOfFrames[k, j];
                        }

                        mean.Add(AutoAndCrossCorrelation.GetAverage(temp));
                        std.Add(AutoAndCrossCorrelation.GetStdev(temp));
                        max.Add(temp.GetMaxValue());
                    }

                    meanFeatureVectors.Add(mean.ToArray());
                    maxFeatureVectors.Add(max.ToArray());
                    stdFeatureVectors.Add(std.ToArray());
                    c += numFrames;
                }

                allMeanFeatureVectors.Add(meanFeatureVectors.ToArray().ToMatrix());
                allMaxFeatureVectors.Add(maxFeatureVectors.ToArray().ToMatrix());
                allStdFeatureVectors.Add(stdFeatureVectors.ToArray().ToMatrix());
            }

            // +++++++++++++++++++++++++++++++++++Temporal Summarization

            // ++++++++++++++++++++++++++++++++++Writing features to file
            // First, concatenate mean, max, std for each second.
            // Then write to CSV file.

            for (int j = 0; j < allMeanFeatureVectors.Count; j++)
            {
                // write the features of each pre-defined frequency band into a separate CSV file
                var outputFeatureFile = Path.Combine(resultDir, "FeatureVectors" + j.ToString() + ".csv");

                // creating the header for CSV file
                List <string> header = new List <string>();
                for (int i = 0; i < allMeanFeatureVectors.ToArray()[j].GetLength(1); i++)
                {
                    header.Add("mean" + i.ToString());
                }

                for (int i = 0; i < allMaxFeatureVectors.ToArray()[j].GetLength(1); i++)
                {
                    header.Add("max" + i.ToString());
                }

                for (int i = 0; i < allStdFeatureVectors.ToArray()[j].GetLength(1); i++)
                {
                    header.Add("std" + i.ToString());
                }

                // concatenating mean, std, and max vector together for each 1 second
                List <double[]> featureVectors = new List <double[]>();
                for (int i = 0; i < allMeanFeatureVectors.ToArray()[j].ToJagged().GetLength(0); i++)
                {
                    List <double[]> featureList = new List <double[]>
                    {
                        allMeanFeatureVectors.ToArray()[j].ToJagged()[i],
                                    allMaxFeatureVectors.ToArray()[j].ToJagged()[i],
                                    allStdFeatureVectors.ToArray()[j].ToJagged()[i],
                    };
                    double[] featureVector = DataTools.ConcatenateVectors(featureList);
                    featureVectors.Add(featureVector);
                }

                // writing feature vectors to CSV file
                using (StreamWriter file = new StreamWriter(outputFeatureFile))
                {
                    // writing the header to CSV file
                    foreach (var entry in header.ToArray())
                    {
                        file.Write(entry + ",");
                    }

                    file.Write(Environment.NewLine);

                    foreach (var entry in featureVectors.ToArray())
                    {
                        foreach (var value in entry)
                        {
                            file.Write(value + ",");
                        }

                        file.Write(Environment.NewLine);
                    }
                }
            }

            /*
             * // Reconstructing the target spectrogram based on clusters' centroids
             * List<double[,]> convertedSpec = new List<double[,]>();
             * int columnPerFreqBand = sonogram2.Data.GetLength(1) / numFreqBand;
             * for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
             * {
             *  double[,] reconstructedSpec2 = KmeansClustering.ReconstructSpectrogram(allSequentialPatchMatrix.ToArray()[i], allClusteringOutput.ToArray()[i]);
             *  convertedSpec.Add(PatchSampling.ConvertPatches(reconstructedSpec2, patchWidth, patchHeight, columnPerFreqBand));
             * }
             *
             * sonogram2.Data = PatchSampling.ConcatFreqBandMatrices(convertedSpec);
             *
             * // DO DRAW SPECTROGRAM
             * var reconstructedSpecImage = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + freqScale.ScaleType.ToString(), freqScale.GridLineLocations);
             * reconstructedSpecImage.Save(outputReSpecImagePath);
             */
        }

示例#12

显示文件

文件： KmeansClusteringTests.cs 项目： MazenImadJaber/audio-analysis

        public void TestKmeansClustering()
        {
            var outputDir       = this.outputDirectory;
            var recordingsPath  = PathHelper.ResolveAssetPath("FeatureLearning");
            var folderPath      = Path.Combine(recordingsPath, "random_audio_segments");
            var outputImagePath = Path.Combine(outputDir.FullName, "ReconstrcutedSpectrogram.png");

            // check whether there is any file in the folder/subfolders
            if (Directory.GetFiles(folderPath, "*", SearchOption.AllDirectories).Length == 0)
            {
                throw new ArgumentException("The folder of recordings is empty. Test will fail!");
            }

            // get the nyquist value from the first wav file in the folder of recordings
            int nq = new AudioRecording(Directory.GetFiles(folderPath, "*.wav")[0]).Nyquist;

            int           nyquist       = nq;
            int           frameSize     = 1024;
            int           finalBinCount = 128;
            int           hertzInterval = 1000;
            FreqScaleType scaleType     = FreqScaleType.Mel;
            var           freqScale     = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval);

            var sonoConfig = new SonogramConfig
            {
                WindowSize = frameSize,

                //WindowOverlap is set based on the fact that each 24 frames is equal to 1 second
                WindowOverlap      = 0.1028,
                DoMelScale         = (scaleType == FreqScaleType.Mel) ? true : false,
                MelBinCount        = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2,
                NoiseReductionType = NoiseReductionType.None,
            };

            int numberOfFreqBand      = 4;
            int patchWidth            = finalBinCount / numberOfFreqBand;
            int patchHeight           = 1;
            int numberOfRandomPatches = 20;

            // Define variable number of "randomPatch" lists based on "numberOfFreqBand"
            Dictionary <string, List <double[, ]> > randomPatchLists = new Dictionary <string, List <double[, ]> >();

            for (int i = 0; i < numberOfFreqBand; i++)
            {
                randomPatchLists.Add(string.Format("randomPatch{0}", i.ToString()), new List <double[, ]>());
            }

            List <double[, ]> randomPatches = new List <double[, ]>();

            foreach (string filePath in Directory.GetFiles(folderPath, "*.wav"))
            {
                FileInfo fileInfo = filePath.ToFileInfo();

                // process the wav file if it is not empty
                if (fileInfo.Length != 0)
                {
                    var recording = new AudioRecording(filePath);
                    sonoConfig.SourceFName = recording.BaseName;
                    var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader);

                    // DO RMS NORMALIZATION
                    sonogram.Data = SNR.RmsNormalization(sonogram.Data);

                    // DO NOISE REDUCTION
                    sonogram.Data = PcaWhitening.NoiseReduction(sonogram.Data);

                    // creating matrices from different freq bands of the source spectrogram
                    List <double[, ]> allSubmatrices = PatchSampling.GetFreqBandMatrices(sonogram.Data, numberOfFreqBand);

                    // Second: selecting random patches from each freq band matrix and add them to the corresponding patch list
                    int count = 0;
                    while (count < allSubmatrices.Count)
                    {
                        randomPatchLists[string.Format("randomPatch{0}", count.ToString())].Add(PatchSampling.GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numberOfRandomPatches, PatchSampling.SamplingMethod.Random).ToMatrix());
                        count++;
                    }
                }
            }

            foreach (string key in randomPatchLists.Keys)
            {
                randomPatches.Add(PatchSampling.ListOf2DArrayToOne2DArray(randomPatchLists[key]));
            }

            // convert list of random patches matrices to one matrix
            int numberOfClusters = 32;
            List <KMeansClusterCollection> allClusteringOutput = new List <KMeansClusterCollection>();

            for (int i = 0; i < randomPatches.Count; i++)
            {
                double[,] patchMatrix = randomPatches[i];

                // Do k-means clustering
                string pathToClusterCsvFile = Path.Combine(outputDir.FullName, "ClusterCentroids" + i.ToString() + ".csv");
                var    clusteringOutput     = KmeansClustering.Clustering(patchMatrix, numberOfClusters);

                // sorting clusters based on size and output it to a csv file
                Dictionary <int, double> clusterIdSize = clusteringOutput.ClusterIdSize;
                int[] sortOrder = KmeansClustering.SortClustersBasedOnSize(clusterIdSize);

                // Write cluster ID and size to a CSV file
                string pathToClusterSizeCsvFile = Path.Combine(outputDir.FullName, "ClusterSize" + i.ToString() + ".csv");
                Csv.WriteToCsv(pathToClusterSizeCsvFile.ToFileInfo(), clusterIdSize);

                // Draw cluster image directly from clustering output
                List <KeyValuePair <int, double[]> > listCluster = clusteringOutput.ClusterIdCentroid.ToList();
                double[][] centroids = new double[listCluster.Count][];

                for (int j = 0; j < listCluster.Count; j++)
                {
                    centroids[j] = listCluster[j].Value;
                }

                allClusteringOutput.Add(clusteringOutput.Clusters);

                List <double[, ]> allCentroids = new List <double[, ]>();
                for (int k = 0; k < centroids.Length; k++)
                {
                    // convert each centroid to a matrix in order of cluster ID
                    // OR: in order of cluster size
                    double[,] centroid = MatrixTools.ArrayToMatrixByColumn(centroids[sortOrder[k]], patchWidth, patchHeight);

                    // normalize each centroid
                    double[,] normalizedCentroid = DataTools.normalise(centroid);

                    // add a row of zero to each centroid
                    double[,] newCentroid = PatchSampling.AddRow(normalizedCentroid);

                    allCentroids.Add(newCentroid);
                }

                // concatenate all centroids
                double[,] mergedCentroidMatrix = PatchSampling.ListOf2DArrayToOne2DArray(allCentroids);

                // Draw clusters
                var clusterImage = ImageTools.DrawMatrixWithoutNormalisation(mergedCentroidMatrix);
                clusterImage.RotateFlip(RotateFlipType.Rotate270FlipNone);

                var outputClusteringImage = Path.Combine(outputDir.FullName, "ClustersWithGrid" + i.ToString() + ".bmp");
                FrequencyScale.DrawFrequencyLinesOnImage((Bitmap)clusterImage, freqScale, includeLabels: false);
                clusterImage.Save(outputClusteringImage);
            }

            //+++++++++++++++++++++++++++++++++++++++++++Reconstructing a target spectrogram from sequential patches and the cluster centroids
            var recording2Path = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav");
            var recording2     = new AudioRecording(recording2Path);
            var sonogram2      = new SpectrogramStandard(sonoConfig, recording2.WavReader);
            var targetSpec     = sonogram2.Data;

            // Do RMS normalization
            sonogram2.Data = SNR.RmsNormalization(sonogram2.Data);

            // NOISE REDUCTION
            sonogram2.Data = PcaWhitening.NoiseReduction(sonogram2.Data);

            // extracting sequential patches from the target spectrogram
            List <double[, ]> allSubmatrices2 = PatchSampling.GetFreqBandMatrices(sonogram2.Data, numberOfFreqBand);

            double[][,] matrices2 = allSubmatrices2.ToArray();
            List <double[, ]> allSequentialPatchMatrix = new List <double[, ]>();

            for (int i = 0; i < matrices2.GetLength(0); i++)
            {
                int rows              = matrices2[i].GetLength(0);
                int columns           = matrices2[i].GetLength(1);
                var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
                allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix());
            }

            List <double[, ]> convertedSpectrogram = new List <double[, ]>();
            int columnPerFreqBand = sonogram2.Data.GetLength(1) / numberOfFreqBand;

            for (int i = 0; i < allSequentialPatchMatrix.Count; i++)
            {
                double[,] reconstructedSpec2 = KmeansClustering.ReconstructSpectrogram(allSequentialPatchMatrix.ToArray()[i], allClusteringOutput.ToArray()[i]);
                convertedSpectrogram.Add(PatchSampling.ConvertPatches(reconstructedSpec2, patchWidth, patchHeight, columnPerFreqBand));
            }

            sonogram2.Data = PatchSampling.ConcatFreqBandMatrices(convertedSpectrogram);

            // DO DRAW SPECTROGRAM
            var reconstructedSpecImage = sonogram2.GetImageFullyAnnotated(sonogram2.GetImage(), "RECONSTRUCTEDSPECTROGRAM: " + freqScale.ScaleType.ToString(), freqScale.GridLineLocations);

            reconstructedSpecImage.Save(outputImagePath, ImageFormat.Png);

            // DO UNIT TESTING
            Assert.AreEqual(targetSpec.GetLength(0), sonogram2.Data.GetLength(0));
            Assert.AreEqual(targetSpec.GetLength(1), sonogram2.Data.GetLength(1));
        }