/// <summary> /// Creates a new ODF object instance /// </summary> /// <param name="spectogram">the spectrogram on which the detection functions operate</param> /// <param name="ratio">calculate the difference to the frame which has the given magnitude ratio</param> /// <param name="frames">calculate the difference to the N-th previous frame</param> public SpectralODF(Spectrogram spectogram, MemoryAllocator allocator, float ratio=0.22f, int frames=0) { _s = spectogram; _allocator = allocator; //determine the number of diff frames if (frames == 0) { //get the first sample with a higher magnitude than given ratio var sample = _s.Window.Find(f => f > ratio).Item1; var diff_samples = _s.Window.Count / 2 - sample; //convert to frames frames = (int)Math.Round(diff_samples / _s.HopSize); } //set the minimum to 1 if (frames < 1) frames = 1; _diffFrames = frames; }
/// <summary> /// Creates a new Spectrogram object instance and performs a STFT on the given audio /// </summary> /// <param name="wav">a Wav object</param> /// <param name="windowSize">is the size for the window in samples</param> /// <param name="fps">is the desired frame rate</param> /// <param name="online">work in online mode (i.e. use only past audio information)</param> /// <param name="phase">include phase information</param> public Spectrogram(Wav wav, MemoryAllocator allocator, int windowSize=2048, int fps=200, bool online=true, bool phase=true) { _allocator = allocator; //init some variables _wav = wav; _fps = fps; //derive some variables HopSize = _wav.Samplerate / (float)_fps; //use floats so that seeking works properly _frames = (int)(_wav.Samples / HopSize); _ffts = windowSize / 2; Bins = windowSize / 2; //initial number equal to ffts, can change if filters are used //init STFT matrix _STFT = _allocator.GetComplex32Matrix(_frames, _ffts); //_STFT = DenseMatrix.Create(_frames, _ffts, Complex32.Zero); //create windowing function var cArray = wav.Audio.ToRowArrays()[0]; var values = MathNet.Numerics.Window.Hann(windowSize).Select(d => (float)d).ToArray(); Window = _allocator.GetFloatVector(values.Length); Window.SetValues(values); //Window = Vector<float>.Build.DenseOfArray(MathNet.Numerics.Window.Hann(windowSize).Select(d => (float)d).ToArray()); //step through all frames System.Numerics.Complex[] result = new System.Numerics.Complex[Window.Count]; foreach (var frame in Enumerable.Range(0, _frames)) { int seek; Vector<float> signal; //seek to the right position in the audio signal if (online) //step back a complete windowSize after moving forward 1 hopSize //so that the current position is at the stop of the window seek = (int)((frame + 1) * HopSize - windowSize); else //step back half of the windowSize so that the frame represents the centre of the window seek = (int)(frame * HopSize - windowSize / 2); //read in the right portion of the audio if (seek >= _wav.Samples) //stop of file reached break; else if (seek + windowSize > _wav.Samples) { //stop behind the actual audio stop, append zeros accordingly int zeroAmount = seek + windowSize - _wav.Samples; //var zeros = Vector<float>.Build.Dense(zeroAmount, 0); var t = PythonUtilities.Slice<float>(cArray, seek, cArray.Length).ToArray(); //t.AddRange(zeros.ToList()); signal = _allocator.GetFloatVector(t.Length + zeroAmount); for (int i = 0; i < t.Length; i++) { signal[i] = t[i]; } //signal.SetValues(t); //signal = Vector<float>.Build.DenseOfEnumerable(t); } else if (seek < 0) { //start before actual audio start, pad with zeros accordingly int zeroAmount = -seek; var zeros = Vector<float>.Build.Dense(zeroAmount, 0).ToList(); var t = PythonUtilities.Slice<float>(cArray, 0, seek + windowSize).ToArray(); zeros.AddRange(t); signal = _allocator.GetFloatVector(t.Length + zeroAmount); signal.SetValues(zeros.ToArray()); //signal = Vector<float>.Build.DenseOfEnumerable(zeros); } else { //normal read operation var slice = PythonUtilities.Slice<float>(cArray, seek, seek + windowSize).ToArray(); signal = _allocator.GetFloatVector(slice.Length); signal.SetValues(slice); //signal = Vector<float>.Build.DenseOfEnumerable(PythonUtilities.Slice<float>(cArray, seek, seek + windowSize)); } //multiply the signal with the window function signal = signal.PointwiseMultiply(Window); //only shift and perform complex DFT if needed if (phase) { //circular shift the signal (needed for correct phase) signal = NumpyCompatibility.FFTShift(signal); } //perform DFT //sanity check Debug.Assert(result.Length == signal.Count); for (int i = 0; i < result.Length; i++) { result[i] = signal[i]; } MathNet.Numerics.IntegralTransforms.Fourier.BluesteinForward(result, MathNet.Numerics.IntegralTransforms.FourierOptions.NoScaling); _STFT.SetRow(frame, result.Select(r => new Complex32((float)r.Real, (float)r.Imaginary)).Take(_ffts).ToArray()); //var _newSTFTRow = result.Select(r => new Complex32((float)r.Real, (float)r.Imaginary)).Take(_ffts).ToArray(); //_STFT.SetRow(frame, _newSTFTRow); //next frame _allocator.ReturnFloatVectorStorage((MathNet.Numerics.LinearAlgebra.Storage.DenseVectorStorage<float>)signal.Storage); } //magnitude spectrogram Spec = _allocator.GetFloatMatrix(_STFT.RowCount, _STFT.ColumnCount); if (phase) Phase = _allocator.GetFloatMatrix(_STFT.RowCount, _STFT.ColumnCount); for (int i = 0; i < Spec.RowCount; i++) { for (int j = 0; j < Spec.ColumnCount; j++) { Spec.At(i, j, _STFT.At(i, j).Magnitude); if (phase) Phase.At(i, j, _STFT.At(i, j).Phase); } } //Spec = _STFT.Map(c => (float)c.Magnitude); //phase //if (phase) //{ // var imag = _STFT.Map(c => (float)c.Imaginary); // var real = _STFT.Map(c => (float)c.Real); // Phase = real.Map2((r, i) => (float)Math.Atan2(i,r), imag); //} }
private void GetOnsets(Wav w, MemoryAllocator allocator) { //construct the spectrogram var s = new Spectrogram(w, allocator, _options.WindowSize, _options.FPS, _options.Online, NeedPhaseInformation(_options.DetectionFunction)); //perform adaptive whitening if (_options.AdaptiveWhitening) s.AW(_options.AWFloor, _options.AWRelax); //construct the filterbank var filt = new Filter(_options.WindowSize / 2, w.Samplerate, allocator); //filter the spectrogram s.Filter(filt.Filterbank); //take the log of the spectrogram if (_options.Log) s.Log(_options.LogMultiplier, _options.LogAdd); //calculate the activations var sodf = new SpectralODF(s, allocator); var act = GetActivations(sodf, _options.DetectionFunction); //detect the onsets var o = new Onsets(act, _options.FPS); o.Detect(_options.ActivationThreshold, _options.MinimumTimeDelta, delay: w.Delay * 1000); var count = o.Detections.Count(f => f < (w.Delay + w.Padding)); //add the onsets to the collection lock (_lock) { _onsets.AddRange(o.Detections.Skip(count)); _amplitudes.AddRange(o.Amplitudes.Skip(count)); } _completed++; ProgressReporter.Report(String.Format("{0}%", Math.Round((((float)_completed / _sliceCount))*100f))); //cleanup s.Cleanup(); filt.Cleanup(); }
public List<Onset> Detect(ISampleSource audio) { _onsets.Clear(); _completed = 0; _sliceCount = 0; _onsets = new List<float>(); _amplitudes = new List<float>(); var onsets = new List<Onset>(); //init detection specific variables int sliceSampleSize = (int)Math.Ceiling(_options.SliceLength * audio.WaveFormat.SampleRate); //the size of each slice's sample int slicePaddingSize = (int)Math.Ceiling(_options.SlicePaddingLength * audio.WaveFormat.SampleRate); _sliceCount = (int)Math.Ceiling((float)audio.Length/audio.WaveFormat.Channels / sliceSampleSize); //the number of slices needed var samples = (int)audio.Length / audio.WaveFormat.Channels; //init parallel specific variables var pOptions = new ParallelOptions(); if (_options.MaxDegreeOfParallelism != -1) pOptions.MaxDegreeOfParallelism = _options.MaxDegreeOfParallelism; ParallelLoopState loopState; List<Wav> wavSlices = new List<Wav>(); for (int i = 0; i < _sliceCount; i++) { int baseStart = i * sliceSampleSize; int adjustedStart = (baseStart - sliceSampleSize > 0) ? baseStart - slicePaddingSize : 0; int count = (sliceSampleSize + slicePaddingSize + baseStart > samples) ? samples - adjustedStart : sliceSampleSize + (baseStart - adjustedStart) + slicePaddingSize; float delay = (float)adjustedStart / audio.WaveFormat.SampleRate; float[] buffer = new float[count * audio.WaveFormat.Channels]; audio.SetPosition(TimeConverter.SampleSourceTimeConverter.ToTimeSpan(audio.WaveFormat, adjustedStart * audio.WaveFormat.Channels)); audio.Read(buffer, 0, count * audio.WaveFormat.Channels); wavSlices.Add(new Wav(buffer, audio.WaveFormat.SampleRate, count, audio.WaveFormat.Channels) { Delay = delay, Padding = ((delay > 0) ? slicePaddingSize : 0) / audio.WaveFormat.SampleRate }); } int bucketSize = 5; int bucketcount = (int)Math.Ceiling((double)wavSlices.Count / bucketSize); MemoryAllocator _allocator = new MemoryAllocator(); for (int i = 0; i < bucketcount; i++) { _allocator.Reset(); int count = bucketSize; if ((i + 1) * bucketSize > wavSlices.Count) count = wavSlices.Count - i * bucketSize; if (count < 0) continue; List<Wav> parallel = wavSlices.GetRange(i * bucketSize, count); var ploopResult = Parallel.ForEach(parallel, pOptions, (w, state) => GetOnsets(w, _allocator)); if (!ploopResult.IsCompleted) throw new Exception(); } onsets = _onsets.Zip(_amplitudes, (onset, amplitude) => new Onset { OnsetTime = onset, OnsetAmplitude = amplitude }).ToList(); onsets = onsets.OrderBy(f => f.OnsetTime).ToList(); float prev = 0; float combine = 0.03f; var ret = new List<Onset>(); for (int i = 0; i < onsets.Count; i++) { if (onsets[i].OnsetTime - prev < _options.MinimumTimeDelta / 1000.0f) continue; prev = onsets[i].OnsetTime; ret.Add(onsets[i]); } return ret; }
/// <summary> /// Creates a new Spectrogram object instance and performs a STFT on the given audio /// </summary> /// <param name="wav">a Wav object</param> /// <param name="windowSize">is the size for the window in samples</param> /// <param name="fps">is the desired frame rate</param> /// <param name="online">work in online mode (i.e. use only past audio information)</param> /// <param name="phase">include phase information</param> public Spectrogram(Wav wav, MemoryAllocator allocator, int windowSize = 2048, int fps = 200, bool online = true, bool phase = true) { _allocator = allocator; //init some variables _wav = wav; _fps = fps; //derive some variables HopSize = _wav.Samplerate / (float)_fps; //use floats so that seeking works properly _frames = (int)(_wav.Samples / HopSize); _ffts = windowSize / 2; Bins = windowSize / 2; //initial number equal to ffts, can change if filters are used //init STFT matrix _STFT = _allocator.GetComplex32Matrix(_frames, _ffts); //_STFT = DenseMatrix.Create(_frames, _ffts, Complex32.Zero); //create windowing function var cArray = wav.Audio.ToRowArrays()[0]; var values = MathNet.Numerics.Window.Hann(windowSize).Select(d => (float)d).ToArray(); Window = _allocator.GetFloatVector(values.Length); Window.SetValues(values); //Window = Vector<float>.Build.DenseOfArray(MathNet.Numerics.Window.Hann(windowSize).Select(d => (float)d).ToArray()); //step through all frames System.Numerics.Complex[] result = new System.Numerics.Complex[Window.Count]; foreach (var frame in Enumerable.Range(0, _frames)) { int seek; Vector <float> signal; //seek to the right position in the audio signal if (online) { //step back a complete windowSize after moving forward 1 hopSize //so that the current position is at the stop of the window seek = (int)((frame + 1) * HopSize - windowSize); } else { //step back half of the windowSize so that the frame represents the centre of the window seek = (int)(frame * HopSize - windowSize / 2); } //read in the right portion of the audio if (seek >= _wav.Samples) { //stop of file reached break; } else if (seek + windowSize > _wav.Samples) { //stop behind the actual audio stop, append zeros accordingly int zeroAmount = seek + windowSize - _wav.Samples; //var zeros = Vector<float>.Build.Dense(zeroAmount, 0); var t = PythonUtilities.Slice <float>(cArray, seek, cArray.Length).ToArray(); //t.AddRange(zeros.ToList()); signal = _allocator.GetFloatVector(t.Length + zeroAmount); for (int i = 0; i < t.Length; i++) { signal[i] = t[i]; } //signal.SetValues(t); //signal = Vector<float>.Build.DenseOfEnumerable(t); } else if (seek < 0) { //start before actual audio start, pad with zeros accordingly int zeroAmount = -seek; var zeros = Vector <float> .Build.Dense(zeroAmount, 0).ToList(); var t = PythonUtilities.Slice <float>(cArray, 0, seek + windowSize).ToArray(); zeros.AddRange(t); signal = _allocator.GetFloatVector(t.Length + zeroAmount); signal.SetValues(zeros.ToArray()); //signal = Vector<float>.Build.DenseOfEnumerable(zeros); } else { //normal read operation var slice = PythonUtilities.Slice <float>(cArray, seek, seek + windowSize).ToArray(); signal = _allocator.GetFloatVector(slice.Length); signal.SetValues(slice); //signal = Vector<float>.Build.DenseOfEnumerable(PythonUtilities.Slice<float>(cArray, seek, seek + windowSize)); } //multiply the signal with the window function signal = signal.PointwiseMultiply(Window); //only shift and perform complex DFT if needed if (phase) { //circular shift the signal (needed for correct phase) signal = NumpyCompatibility.FFTShift(signal); } //perform DFT //sanity check Debug.Assert(result.Length == signal.Count); for (int i = 0; i < result.Length; i++) { result[i] = signal[i]; } MathNet.Numerics.IntegralTransforms.Fourier.BluesteinForward(result, MathNet.Numerics.IntegralTransforms.FourierOptions.NoScaling); _STFT.SetRow(frame, result.Select(r => new Complex32((float)r.Real, (float)r.Imaginary)).Take(_ffts).ToArray()); //var _newSTFTRow = result.Select(r => new Complex32((float)r.Real, (float)r.Imaginary)).Take(_ffts).ToArray(); //_STFT.SetRow(frame, _newSTFTRow); //next frame _allocator.ReturnFloatVectorStorage((MathNet.Numerics.LinearAlgebra.Storage.DenseVectorStorage <float>)signal.Storage); } //magnitude spectrogram Spec = _allocator.GetFloatMatrix(_STFT.RowCount, _STFT.ColumnCount); if (phase) { Phase = _allocator.GetFloatMatrix(_STFT.RowCount, _STFT.ColumnCount); } for (int i = 0; i < Spec.RowCount; i++) { for (int j = 0; j < Spec.ColumnCount; j++) { Spec.At(i, j, _STFT.At(i, j).Magnitude); if (phase) { Phase.At(i, j, _STFT.At(i, j).Phase); } } } //Spec = _STFT.Map(c => (float)c.Magnitude); //phase //if (phase) //{ // var imag = _STFT.Map(c => (float)c.Imaginary); // var real = _STFT.Map(c => (float)c.Real); // Phase = real.Map2((r, i) => (float)Math.Atan2(i,r), imag); //} }
public List <Onset> Detect(ISampleSource audio) { _onsets.Clear(); _completed = 0; _sliceCount = 0; _onsets = new List <float>(); _amplitudes = new List <float>(); var onsets = new List <Onset>(); //init detection specific variables int sliceSampleSize = (int)Math.Ceiling(_options.SliceLength * audio.WaveFormat.SampleRate); //the size of each slice's sample int slicePaddingSize = (int)Math.Ceiling(_options.SlicePaddingLength * audio.WaveFormat.SampleRate); _sliceCount = (int)Math.Ceiling((float)audio.Length / audio.WaveFormat.Channels / sliceSampleSize); //the number of slices needed var samples = (int)audio.Length / audio.WaveFormat.Channels; //init parallel specific variables var pOptions = new ParallelOptions(); if (_options.MaxDegreeOfParallelism != -1) { pOptions.MaxDegreeOfParallelism = _options.MaxDegreeOfParallelism; } ParallelLoopState loopState; List <Wav> wavSlices = new List <Wav>(); for (int i = 0; i < _sliceCount; i++) { int baseStart = i * sliceSampleSize; int adjustedStart = (baseStart - sliceSampleSize > 0) ? baseStart - slicePaddingSize : 0; int count = (sliceSampleSize + slicePaddingSize + baseStart > samples) ? samples - adjustedStart : sliceSampleSize + (baseStart - adjustedStart) + slicePaddingSize; float delay = (float)adjustedStart / audio.WaveFormat.SampleRate; float[] buffer = new float[count * audio.WaveFormat.Channels]; audio.SetPosition(TimeConverter.SampleSourceTimeConverter.ToTimeSpan(audio.WaveFormat, adjustedStart * audio.WaveFormat.Channels)); audio.Read(buffer, 0, count * audio.WaveFormat.Channels); wavSlices.Add(new Wav(buffer, audio.WaveFormat.SampleRate, count, audio.WaveFormat.Channels) { Delay = delay, Padding = ((delay > 0) ? slicePaddingSize : 0) / audio.WaveFormat.SampleRate }); } int bucketSize = 5; int bucketcount = (int)Math.Ceiling((double)wavSlices.Count / bucketSize); MemoryAllocator _allocator = new MemoryAllocator(); for (int i = 0; i < bucketcount; i++) { _allocator.Reset(); int count = bucketSize; if ((i + 1) * bucketSize > wavSlices.Count) { count = wavSlices.Count - i * bucketSize; } if (count < 0) { continue; } List <Wav> parallel = wavSlices.GetRange(i * bucketSize, count); var ploopResult = Parallel.ForEach(parallel, pOptions, (w, state) => GetOnsets(w, _allocator)); if (!ploopResult.IsCompleted) { throw new Exception(); } } onsets = _onsets.Zip(_amplitudes, (onset, amplitude) => new Onset { OnsetTime = onset, OnsetAmplitude = amplitude }).ToList(); onsets = onsets.OrderBy(f => f.OnsetTime).ToList(); float prev = 0; float combine = 0.03f; var ret = new List <Onset>(); for (int i = 0; i < onsets.Count; i++) { if (onsets[i].OnsetTime - prev < _options.MinimumTimeDelta / 1000.0f) { continue; } prev = onsets[i].OnsetTime; ret.Add(onsets[i]); } return(ret); }