public List <SpectralImage> CreateLogSpectrogram(AudioSamples audioSamples, SpectrogramConfig configuration)
        {
            using (new DebugTimer("CreateLogSpectrogram()"))
            {
                int wdftSize = configuration.WdftSize;
                int width    = (audioSamples.Samples.Length - wdftSize) / configuration.Overlap;
                if (width < 1)
                {
                    return(new List <SpectralImage>());
                }

                float[]  frames = new float[width * configuration.LogBins];
                ushort[] logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(audioSamples.SampleRate, configuration);
                float[]  window  = configuration.Window.GetWindow(wdftSize);
                float[]  samples = audioSamples.Samples;

                // PIN: reverted the following FFT to use lomontFFT with managed code (not the unsafe changed made by the original author due to the issues on my computers)

                // NOTE! When using Parallell.For the result becomes different from time to time
                // when running in Release mode.
                // Therefore make sure to use for loop instead
                for (int index = 0; index < width; index++)
                // Parallel.For(0, width, index =>
                {
                    var fftArray = CopyAndWindow(samples, index * configuration.Overlap, window);

                    lomontFFT.RealFFT(fftArray, true);

                    // after the lomont realfft the fft input array will contain the FFT values
                    // r0, r(n/2), r1, i1, r2, i2 ...
                    // since the extract log bins method only uses lowBound index above 2 we can ignore the fact
                    // that the first and second values are "special":  r0, r(n/2)
                    // see https://github.com/perivar/FindSimilar/blob/6b658b1c54d1504136e25e933f39b7c303da5d9e/Mirage/Fft.cs
                    ExtractLogBins(fftArray, logFrequenciesIndexes, configuration.LogBins, wdftSize, frames, index);
                }
                // );

                if (configuration.Verbosity == Verbosity.Verbose)
                {
                    var imageService = new FindSimilarImageService();
                    using (Image image = imageService.GetSpectrogramImage(frames, width, configuration.LogBins, width, configuration.LogBins))
                    {
                        var fileName = Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_spectrogram.png"));
                        if (fileName != null)
                        {
                            image.Save(fileName, ImageFormat.Png);
                        }
                    }

                    WriteOutputUtils.WriteCSV(frames, Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_frames.csv")));
                }

                var spectralImages = CutLogarithmizedSpectrum(frames, audioSamples.SampleRate, configuration);

                if (configuration.Verbosity == Verbosity.Verbose)
                {
                    if (spectralImages.Count > 0)
                    {
                        var spectralImageList = new List <float[]>();
                        foreach (var spectralImage in spectralImages)
                        {
                            spectralImageList.Add(spectralImage.Image);
                        }
                        var spectralImageArray = spectralImageList.ToArray();
                        WriteOutputUtils.WriteCSV(spectralImageArray, Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_spectral_images.csv")), ";");
                    }
                }

                ScaleFullSpectrum(spectralImages, configuration);
                return(spectralImages);
            }
        }
Ejemplo n.º 2
0
        public List <HashedFingerprint> CreateFingerprints(AudioSamples samples, FingerprintConfiguration configuration)
        {
            // Explode samples to the range of 16 bit shorts (–32,768 to 32,767)
            // Matlab multiplies with 2^15 (32768)
            const int AUDIO_MULTIPLIER = 65536; // 32768 still makes alot of mfcc feature computations fail!

            // Explode samples to the range of 16 bit shorts (–32,768 to 32,767)
            // Matlab multiplies with 2^15 (32768)
            // e.g. if( max(abs(speech))<=1 ), speech = speech * 2^15; end;
            float[] audiodata = samples.Samples;
            MathUtils.Multiply(ref audiodata, AUDIO_MULTIPLIER);

            // zero pad if the audio file is too short to perform a fft
            if (audiodata.Length < (configuration.SpectrogramConfig.WdftSize + configuration.SpectrogramConfig.Overlap))
            {
                int lenNew = configuration.SpectrogramConfig.WdftSize + configuration.SpectrogramConfig.Overlap;
                Array.Resize <float>(ref audiodata, lenNew);
            }
            samples.Samples = audiodata;

            if (configuration.SpectrogramConfig.Verbosity == Verbosity.Verbose)
            {
                WriteOutputUtils.WriteCSV(audiodata, Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(samples.Origin) + "_audiodata.csv")));
            }

            // create log spectrogram
            var spectralImages = spectrumService.CreateLogSpectrogram(samples, configuration.SpectrogramConfig);

            if (configuration.SpectrogramConfig.Verbosity == Verbosity.Verbose)
            {
                if (spectralImages.Count > 0)
                {
                    var imageService = new FindSimilarImageService();
                    using (Image image = imageService.GetLogSpectralImages(spectralImages, spectralImages.Count > 5 ? 5 : spectralImages.Count))
                    {
                        var fileName = Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(samples.Origin) + "_spectral_images.png"));
                        if (fileName != null)
                        {
                            image.Save(fileName, ImageFormat.Png);
                        }
                    }
                }
            }

            var fingerprints = CreateFingerprintsFromLogSpectrum(spectralImages, configuration);

            if (configuration.SpectrogramConfig.Verbosity == Verbosity.Verbose)
            {
                if (fingerprints.Count > 0)
                {
                    var imageService = new FindSimilarImageService();
                    using (Image image = imageService.GetImageForFingerprints(fingerprints, 128, 32, fingerprints.Count > 5 ? 5 : fingerprints.Count))
                    {
                        var fileName = Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(samples.Origin) + "_fingerprints.png"));
                        if (fileName != null)
                        {
                            image.Save(fileName, ImageFormat.Png);
                        }
                    }
                }
            }

            var hashedFingerprints = HashFingerprints(fingerprints, configuration);

            if (configuration.SpectrogramConfig.Verbosity == Verbosity.Verbose)
            {
                if (hashedFingerprints.Count > 0)
                {
                    var hashedFingerprintList = new List <int[]>();
                    foreach (var hashedFingerprint in hashedFingerprints)
                    {
                        hashedFingerprintList.Add(hashedFingerprint.HashBins);
                    }
                    var hashedFingerprinArray = hashedFingerprintList.ToArray();
                    WriteOutputUtils.WriteCSV(hashedFingerprinArray, Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(samples.Origin) + "_hashbins.csv")), ";");
                }
            }

            return(hashedFingerprints);
        }