예제 #1
0
        public List <SpectralImage> CreateLogSpectrogram(AudioSamples audioSamples, SpectrogramConfig configuration)
        {
            int wdftSize = configuration.WdftSize;
            int width    = (audioSamples.Samples.Length - wdftSize) / configuration.Overlap;

            if (width < 1)
            {
                return(new List <SpectralImage>());
            }

            float[]  frames = new float[width * configuration.LogBins];
            ushort[] logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(audioSamples.SampleRate, configuration);
            float[]  window  = configuration.Window.GetWindow(wdftSize);
            float[]  samples = audioSamples.Samples;

            unsafe
            {
                Parallel.For(0, width, index =>
                {
                    float *fftArray = stackalloc float[wdftSize];
                    CopyAndWindow(fftArray, samples, index * configuration.Overlap, window);
                    fftServiceUnsafe.FFTForwardInPlace(fftArray, wdftSize);
                    ExtractLogBins(fftArray, logFrequenciesIndexes, configuration.LogBins, wdftSize, frames, index);
                });
            }

            var images = CutLogarithmizedSpectrum(frames, audioSamples.SampleRate, configuration);

            ScaleFullSpectrum(images, configuration);
            return(images);
        }
예제 #2
0
        public List <SpectralImage> CreateLogSpectrogram(AudioSamples audioSamples, SpectrogramConfig configuration)
        {
            int width = (audioSamples.Samples.Length - configuration.WdftSize) / configuration.Overlap;

            if (width < 1)
            {
                return(new List <SpectralImage>());
            }

            float[][] frames = new float[width][];
            int[]     logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(audioSamples.SampleRate, configuration);
            for (int i = 0; i < width; i++)
            {
                float[] complexSignal = fftService.FFTForward(audioSamples.Samples, i * configuration.Overlap, configuration.WdftSize);
                frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins);
            }

            return(CutLogarithmizedSpectrum(frames, audioSamples.SampleRate, configuration));
        }
예제 #3
0
        public float[][] CreateLogSpectrogram(float[] samples, IFingerprintConfiguration configuration)
        {
            if (configuration.NormalizeSignal)
            {
                audioSamplesNormalizer.NormalizeInPlace(samples);
            }

            int width = (samples.Length - configuration.WdftSize) / configuration.Overlap; /*width of the image*/

            float[][] frames = new float[width][];
            int[]     logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(configuration);
            for (int i = 0; i < width; i++)
            {
                float[] complexSignal = fftService.FFTForward(samples, i * configuration.Overlap, configuration.WdftSize);
                frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins);
            }

            return(frames);
        }
예제 #4
0
        public void GenerateLogFrequenciesRangesTest()
        {
            var defaultConfig = new DefaultSpectrogramConfig {
                UseDynamicLogBase = false, LogBase = 10
            };

            float[] logSpacedFrequencies = new[] // generated in matlab with logspace(2.50242712, 3.3010299957, 33)
            {
                318.00f, 336.81f, 356.73f, 377.83f, 400.18f, 423.85f, 448.92f, 475.47f, 503.59f, 533.38f, 564.92f,
                598.34f, 633.73f, 671.21f, 710.91f, 752.96f, 797.50f, 844.67f, 894.63f, 947.54f, 1003.58f, 1062.94f,
                1125.81f, 1192.40f, 1262.93f, 1337.63f, 1416.75f, 1500.54f, 1589.30f, 1683.30f, 1782.86f, 1888.31f,
                2000f
            };

            int[] indexes = logUtility.GenerateLogFrequenciesRanges(defaultFingerprintConfiguration.SampleRate, defaultConfig);

            for (int i = 0; i < logSpacedFrequencies.Length; i++)
            {
                var logSpacedFrequency = logSpacedFrequencies[i];
                int index = logUtility.FrequencyToSpectrumIndex(logSpacedFrequency, defaultFingerprintConfiguration.SampleRate, defaultConfig.WdftSize);
                Assert.AreEqual(index, indexes[i]);
            }
        }
        public List <SpectralImage> CreateLogSpectrogram(AudioSamples audioSamples, SpectrogramConfig configuration)
        {
            using (new DebugTimer("CreateLogSpectrogram()"))
            {
                int wdftSize = configuration.WdftSize;
                int width    = (audioSamples.Samples.Length - wdftSize) / configuration.Overlap;
                if (width < 1)
                {
                    return(new List <SpectralImage>());
                }

                float[]  frames = new float[width * configuration.LogBins];
                ushort[] logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(audioSamples.SampleRate, configuration);
                float[]  window  = configuration.Window.GetWindow(wdftSize);
                float[]  samples = audioSamples.Samples;

                // PIN: reverted the following FFT to use lomontFFT with managed code (not the unsafe changed made by the original author due to the issues on my computers)

                // NOTE! When using Parallell.For the result becomes different from time to time
                // when running in Release mode.
                // Therefore make sure to use for loop instead
                for (int index = 0; index < width; index++)
                // Parallel.For(0, width, index =>
                {
                    var fftArray = CopyAndWindow(samples, index * configuration.Overlap, window);

                    lomontFFT.RealFFT(fftArray, true);

                    // after the lomont realfft the fft input array will contain the FFT values
                    // r0, r(n/2), r1, i1, r2, i2 ...
                    // since the extract log bins method only uses lowBound index above 2 we can ignore the fact
                    // that the first and second values are "special":  r0, r(n/2)
                    // see https://github.com/perivar/FindSimilar/blob/6b658b1c54d1504136e25e933f39b7c303da5d9e/Mirage/Fft.cs
                    ExtractLogBins(fftArray, logFrequenciesIndexes, configuration.LogBins, wdftSize, frames, index);
                }
                // );

                if (configuration.Verbosity == Verbosity.Verbose)
                {
                    var imageService = new FindSimilarImageService();
                    using (Image image = imageService.GetSpectrogramImage(frames, width, configuration.LogBins, width, configuration.LogBins))
                    {
                        var fileName = Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_spectrogram.png"));
                        if (fileName != null)
                        {
                            image.Save(fileName, ImageFormat.Png);
                        }
                    }

                    WriteOutputUtils.WriteCSV(frames, Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_frames.csv")));
                }

                var spectralImages = CutLogarithmizedSpectrum(frames, audioSamples.SampleRate, configuration);

                if (configuration.Verbosity == Verbosity.Verbose)
                {
                    if (spectralImages.Count > 0)
                    {
                        var spectralImageList = new List <float[]>();
                        foreach (var spectralImage in spectralImages)
                        {
                            spectralImageList.Add(spectralImage.Image);
                        }
                        var spectralImageArray = spectralImageList.ToArray();
                        WriteOutputUtils.WriteCSV(spectralImageArray, Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_spectral_images.csv")), ";");
                    }
                }

                ScaleFullSpectrum(spectralImages, configuration);
                return(spectralImages);
            }
        }