private void ScaleFullSpectrum(IEnumerable <SpectralImage> spectralImages, SpectrogramConfig configuration) { Parallel.ForEach(spectralImages, image => { ScaleSpectrum(image, configuration.ScalingFunction); }); }
public List <SpectralImage> CreateLogSpectrogram(AudioSamples audioSamples, SpectrogramConfig configuration) { int wdftSize = configuration.WdftSize; int width = (audioSamples.Samples.Length - wdftSize) / configuration.Overlap; if (width < 1) { return(new List <SpectralImage>()); } float[] frames = new float[width * configuration.LogBins]; ushort[] logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(audioSamples.SampleRate, configuration); float[] window = configuration.Window.GetWindow(wdftSize); float[] samples = audioSamples.Samples; unsafe { Parallel.For(0, width, index => { float *fftArray = stackalloc float[wdftSize]; CopyAndWindow(fftArray, samples, index * configuration.Overlap, window); fftServiceUnsafe.FFTForwardInPlace(fftArray, wdftSize); ExtractLogBins(fftArray, logFrequenciesIndexes, configuration.LogBins, wdftSize, frames, index); }); } var images = CutLogarithmizedSpectrum(frames, audioSamples.SampleRate, configuration); ScaleFullSpectrum(images, configuration); return(images); }
public int[] GenerateLogFrequenciesRanges(int sampleRate, SpectrogramConfig configuration) { if (configuration.UseDynamicLogBase) { return(GenerateLogFrequenciesDynamicBase(sampleRate, configuration)); } return(GenerateStaticLogFrequencies(sampleRate, configuration)); }
internal FindSimilarSpectrumService(SpectrogramConfig configuration, ILogUtility logUtility) { this.logUtility = logUtility; this.lomontFFT = new Lomont.LomontFFT(); lomontFFT.A = 1; lomontFFT.B = 1; lomontFFT.Initialize(configuration.WdftSize); }
private int[] GenerateLogFrequenciesDynamicBase(int sampleRate, SpectrogramConfig configuration) { double logBase = Math.Exp(Math.Log((float)configuration.FrequencyRange.Max / configuration.FrequencyRange.Min) / configuration.LogBins); double mincoef = (float)configuration.WdftSize / sampleRate * configuration.FrequencyRange.Min; int[] indexes = new int[configuration.LogBins + 1]; for (int j = 0; j < configuration.LogBins + 1; j++) { int start = (int)((Math.Pow(logBase, j) - 1.0) * mincoef); indexes[j] = start + (int)mincoef; } return(indexes); }
public void CustomSpectrumValuesInheritFromDefault() { SpectrogramConfig defaultConfiguration = SpectrogramConfig.Default; SpectrogramConfig customConfiguration = new CustomSpectrogramConfig(); Assert.AreEqual(defaultConfiguration.ImageLength, customConfiguration.ImageLength); Assert.AreEqual(defaultConfiguration.LogBase, customConfiguration.LogBase); Assert.AreEqual(defaultConfiguration.LogBins, customConfiguration.LogBins); Assert.AreEqual(defaultConfiguration.FrequencyRange.Max, customConfiguration.FrequencyRange.Max); Assert.AreEqual(defaultConfiguration.FrequencyRange.Min, customConfiguration.FrequencyRange.Min); Assert.AreEqual(defaultConfiguration.UseDynamicLogBase, customConfiguration.UseDynamicLogBase); Assert.AreEqual(defaultConfiguration.WdftSize, customConfiguration.WdftSize); Assert.AreEqual(defaultConfiguration.Overlap, customConfiguration.Overlap); }
public List <SpectralImage> CreateLogSpectrogram(AudioSamples audioSamples, SpectrogramConfig configuration) { int width = (audioSamples.Samples.Length - configuration.WdftSize) / configuration.Overlap; if (width < 1) { return(new List <SpectralImage>()); } float[][] frames = new float[width][]; //int[] logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(audioSamples.SampleRate, configuration); for (int i = 0; i < width; i++) { float[] complexSignal = fftService.FFTForward(audioSamples.Samples, i * configuration.Overlap, configuration.WdftSize); frames[i] = ExtractLogBins(complexSignal, logFrequenciesIndexes, configuration.LogBins); } return(CutLogarithmizedSpectrum(frames, audioSamples.SampleRate, configuration)); }
private int[] GenerateStaticLogFrequencies(int sampleRate, SpectrogramConfig configuration) { double logMin = Math.Log(configuration.FrequencyRange.Min, configuration.LogBase); double logMax = Math.Log(configuration.FrequencyRange.Max, configuration.LogBase); double delta = (logMax - logMin) / configuration.LogBins; int[] indexes = new int[configuration.LogBins + 1]; double accDelta = 0; for (int i = 0; i <= configuration.LogBins; ++i) { float freq = (float)Math.Pow(configuration.LogBase, logMin + accDelta); accDelta += delta; indexes[i] = FrequencyToSpectrumIndex(freq, sampleRate, configuration.WdftSize); // Find the start index in array from which to start the summation } return(indexes); }
protected List <SpectralImage> CutLogarithmizedSpectrum(float[][] logarithmizedSpectrum, int sampleRate, SpectrogramConfig configuration) { var strideBetweenConsecutiveImages = configuration.Stride; int overlap = configuration.Overlap; int index = (int)((float)strideBetweenConsecutiveImages.FirstStride / overlap); int numberOfLogBins = logarithmizedSpectrum[0].Length; var spectralImages = new List <SpectralImage>(); int width = logarithmizedSpectrum.GetLength(0); int fingerprintImageLength = configuration.ImageLength; int sequenceNumber = 0; while (index + fingerprintImageLength <= width) { float[][] spectralImage = AllocateMemoryForFingerprintImage(fingerprintImageLength, numberOfLogBins); for (int i = 0; i < fingerprintImageLength; i++) { Array.Copy(logarithmizedSpectrum[index + i], spectralImage[i], numberOfLogBins); } spectralImages.Add(new SpectralImage { Image = spectralImage, Timestamp = index * ((double)overlap / sampleRate), SequenceNumber = ++sequenceNumber }); index += fingerprintImageLength + (int)((float)strideBetweenConsecutiveImages.GetNextStride() / overlap); } return(spectralImages); }
public new List <SpectralImage> CutLogarithmizedSpectrum(float[][] logarithmizedSpectrum, int sampleRate, SpectrogramConfig configuration) { return(base.CutLogarithmizedSpectrum(logarithmizedSpectrum, sampleRate, configuration)); }
public List <SpectralImage> CutLogarithmizedSpectrum(float[] logarithmizedSpectrum, int sampleRate, SpectrogramConfig configuration) { var strideBetweenConsecutiveImages = configuration.Stride; int overlap = configuration.Overlap; int index = GetFrequencyIndexLocationOfAudioSamples(strideBetweenConsecutiveImages.FirstStride, overlap); int numberOfLogBins = configuration.LogBins; var spectralImages = new List <SpectralImage>(); int width = logarithmizedSpectrum.Length / numberOfLogBins; ushort fingerprintImageLength = configuration.ImageLength; int fullLength = configuration.ImageLength * numberOfLogBins; uint sequenceNumber = 0; while (index + fingerprintImageLength <= width) { float[] spectralImage = new float[fingerprintImageLength * numberOfLogBins]; Buffer.BlockCopy(logarithmizedSpectrum, sizeof(float) * index * numberOfLogBins, spectralImage, 0, fullLength * sizeof(float)); float startsAt = index * ((float)overlap / sampleRate); spectralImages.Add(new SpectralImage(spectralImage, fingerprintImageLength, (ushort)numberOfLogBins, startsAt, sequenceNumber)); index += fingerprintImageLength + GetFrequencyIndexLocationOfAudioSamples(strideBetweenConsecutiveImages.NextStride, overlap); sequenceNumber++; } return(spectralImages); }
public List <SpectralImage> CutLogarithmizedSpectrum(float[][] logarithmizedSpectrum, int sampleRate, SpectrogramConfig configuration) { var strideBetweenConsecutiveImages = configuration.Stride; int overlap = configuration.Overlap; int index = GetFrequencyIndexLocationOfAudioSamples(strideBetweenConsecutiveImages.FirstStride, overlap); int numberOfLogBins = configuration.LogBins; var spectralImages = new List <SpectralImage>(); int width = logarithmizedSpectrum.GetLength(0); int fingerprintImageLength = configuration.ImageLength; int sequenceNumber = 0; while (index + fingerprintImageLength <= width) { float[][] spectralImage = AllocateMemoryForFingerprintImage(fingerprintImageLength, numberOfLogBins); for (int i = 0; i < fingerprintImageLength; i++) { Buffer.BlockCopy(logarithmizedSpectrum[index + i], 0, spectralImage[i], 0, numberOfLogBins * sizeof(float)); } var startsAt = index * ((double)overlap / sampleRate); spectralImages.Add(new SpectralImage(spectralImage, startsAt, sequenceNumber)); index += fingerprintImageLength + GetFrequencyIndexLocationOfAudioSamples(strideBetweenConsecutiveImages.NextStride, overlap); sequenceNumber++; } return(spectralImages); }
public List <SpectralImage> CreateLogSpectrogram(AudioSamples audioSamples, SpectrogramConfig configuration) { using (new DebugTimer("CreateLogSpectrogram()")) { int wdftSize = configuration.WdftSize; int width = (audioSamples.Samples.Length - wdftSize) / configuration.Overlap; if (width < 1) { return(new List <SpectralImage>()); } float[] frames = new float[width * configuration.LogBins]; ushort[] logFrequenciesIndexes = logUtility.GenerateLogFrequenciesRanges(audioSamples.SampleRate, configuration); float[] window = configuration.Window.GetWindow(wdftSize); float[] samples = audioSamples.Samples; // PIN: reverted the following FFT to use lomontFFT with managed code (not the unsafe changed made by the original author due to the issues on my computers) // NOTE! When using Parallell.For the result becomes different from time to time // when running in Release mode. // Therefore make sure to use for loop instead for (int index = 0; index < width; index++) // Parallel.For(0, width, index => { var fftArray = CopyAndWindow(samples, index * configuration.Overlap, window); lomontFFT.RealFFT(fftArray, true); // after the lomont realfft the fft input array will contain the FFT values // r0, r(n/2), r1, i1, r2, i2 ... // since the extract log bins method only uses lowBound index above 2 we can ignore the fact // that the first and second values are "special": r0, r(n/2) // see https://github.com/perivar/FindSimilar/blob/6b658b1c54d1504136e25e933f39b7c303da5d9e/Mirage/Fft.cs ExtractLogBins(fftArray, logFrequenciesIndexes, configuration.LogBins, wdftSize, frames, index); } // ); if (configuration.Verbosity == Verbosity.Verbose) { var imageService = new FindSimilarImageService(); using (Image image = imageService.GetSpectrogramImage(frames, width, configuration.LogBins, width, configuration.LogBins)) { var fileName = Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_spectrogram.png")); if (fileName != null) { image.Save(fileName, ImageFormat.Png); } } WriteOutputUtils.WriteCSV(frames, Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_frames.csv"))); } var spectralImages = CutLogarithmizedSpectrum(frames, audioSamples.SampleRate, configuration); if (configuration.Verbosity == Verbosity.Verbose) { if (spectralImages.Count > 0) { var spectralImageList = new List <float[]>(); foreach (var spectralImage in spectralImages) { spectralImageList.Add(spectralImage.Image); } var spectralImageArray = spectralImageList.ToArray(); WriteOutputUtils.WriteCSV(spectralImageArray, Path.Combine(SoundFingerprinter.DEBUG_DIRECTORY_PATH, (Path.GetFileNameWithoutExtension(audioSamples.Origin) + "_spectral_images.csv")), ";"); } } ScaleFullSpectrum(spectralImages, configuration); return(spectralImages); } }