Example #1
0
 /// <summary>
 /// Creates a new ODF object instance
 /// </summary>
 /// <param name="spectogram">the spectrogram on which the detection functions operate</param>
 /// <param name="ratio">calculate the difference to the frame which has the given magnitude ratio</param>
 /// <param name="frames">calculate the difference to the N-th previous frame</param>
 public SpectralODF(Spectrogram spectogram, MemoryAllocator allocator, float ratio=0.22f, int frames=0)
 {
     _s = spectogram;
     _allocator = allocator;
     //determine the number of diff frames
     if (frames == 0)
     {
         //get the first sample with a higher magnitude than given ratio
         var sample = _s.Window.Find(f => f > ratio).Item1;
         var diff_samples = _s.Window.Count / 2 - sample;
         //convert to frames
         frames = (int)Math.Round(diff_samples / _s.HopSize);
     }
     //set the minimum to 1
     if (frames < 1) frames = 1;
     _diffFrames = frames;
 }
Example #2
0
        /// <summary>
        /// Creates a new Spectrogram object instance and performs a STFT on the given audio
        /// </summary>
        /// <param name="wav">a Wav object</param>
        /// <param name="windowSize">is the size for the window in samples</param>
        /// <param name="fps">is the desired frame rate</param>
        /// <param name="online">work in online mode (i.e. use only past audio information)</param>
        /// <param name="phase">include phase information</param>
        public Spectrogram(Wav wav, MemoryAllocator allocator, int windowSize=2048, int fps=200, bool online=true, bool phase=true)
        {
            _allocator = allocator;
            //init some variables
            _wav = wav;
            _fps = fps;
            //derive some variables
            HopSize = _wav.Samplerate / (float)_fps; //use floats so that seeking works properly
            _frames = (int)(_wav.Samples / HopSize);
            _ffts = windowSize / 2;
            Bins = windowSize / 2; //initial number equal to ffts, can change if filters are used

            //init STFT matrix
            _STFT = _allocator.GetComplex32Matrix(_frames, _ffts);
            //_STFT = DenseMatrix.Create(_frames, _ffts, Complex32.Zero);

            //create windowing function
            var cArray = wav.Audio.ToRowArrays()[0];

            var values = MathNet.Numerics.Window.Hann(windowSize).Select(d => (float)d).ToArray();
            Window = _allocator.GetFloatVector(values.Length);
            Window.SetValues(values);

            //Window = Vector<float>.Build.DenseOfArray(MathNet.Numerics.Window.Hann(windowSize).Select(d => (float)d).ToArray());

            //step through all frames
            System.Numerics.Complex[] result = new System.Numerics.Complex[Window.Count];
            foreach (var frame in Enumerable.Range(0, _frames))
            {
                int seek;
                Vector<float> signal;
                //seek to the right position in the audio signal
                if (online)
                    //step back a complete windowSize after moving forward 1 hopSize
                    //so that the current position is at the stop of the window
                    seek = (int)((frame + 1) * HopSize - windowSize);
                else
                    //step back half of the windowSize so that the frame represents the centre of the window
                    seek = (int)(frame * HopSize - windowSize / 2);
                //read in the right portion of the audio
                if (seek >= _wav.Samples)
                    //stop of file reached
                    break;
                else if (seek + windowSize > _wav.Samples)
                {
                    //stop behind the actual audio stop, append zeros accordingly
                    int zeroAmount = seek + windowSize - _wav.Samples;
                    //var zeros = Vector<float>.Build.Dense(zeroAmount, 0);

                    var t = PythonUtilities.Slice<float>(cArray, seek, cArray.Length).ToArray();

                    //t.AddRange(zeros.ToList());

                    signal = _allocator.GetFloatVector(t.Length + zeroAmount);
                    for (int i = 0; i < t.Length; i++)
                    {
                        signal[i] = t[i];
                    }
                    //signal.SetValues(t);
                    //signal = Vector<float>.Build.DenseOfEnumerable(t);
                }
                else if (seek < 0)
                {
                    //start before actual audio start, pad with zeros accordingly
                    int zeroAmount = -seek;
                    var zeros = Vector<float>.Build.Dense(zeroAmount, 0).ToList();

                    var t = PythonUtilities.Slice<float>(cArray, 0, seek + windowSize).ToArray();
                    zeros.AddRange(t);

                    signal = _allocator.GetFloatVector(t.Length + zeroAmount);
                    signal.SetValues(zeros.ToArray());
                    //signal = Vector<float>.Build.DenseOfEnumerable(zeros);
                }
                else
                {
                    //normal read operation
                    var slice = PythonUtilities.Slice<float>(cArray, seek, seek + windowSize).ToArray();
                    signal = _allocator.GetFloatVector(slice.Length);
                    signal.SetValues(slice);

                    //signal = Vector<float>.Build.DenseOfEnumerable(PythonUtilities.Slice<float>(cArray, seek, seek + windowSize));
                }
                //multiply the signal with the window function
                signal = signal.PointwiseMultiply(Window);
                //only shift and perform complex DFT if needed
                if (phase)
                {
                    //circular shift the signal (needed for correct phase)
                    signal = NumpyCompatibility.FFTShift(signal);
                }
                //perform DFT
                //sanity check
                Debug.Assert(result.Length == signal.Count);
                for (int i = 0; i < result.Length; i++)
                {
                    result[i] = signal[i];
                }
                MathNet.Numerics.IntegralTransforms.Fourier.BluesteinForward(result, MathNet.Numerics.IntegralTransforms.FourierOptions.NoScaling);
                _STFT.SetRow(frame, result.Select(r => new Complex32((float)r.Real, (float)r.Imaginary)).Take(_ffts).ToArray());
                //var _newSTFTRow = result.Select(r => new Complex32((float)r.Real, (float)r.Imaginary)).Take(_ffts).ToArray();
                //_STFT.SetRow(frame, _newSTFTRow);
                //next frame
                _allocator.ReturnFloatVectorStorage((MathNet.Numerics.LinearAlgebra.Storage.DenseVectorStorage<float>)signal.Storage);
            }
            //magnitude spectrogram

            Spec = _allocator.GetFloatMatrix(_STFT.RowCount, _STFT.ColumnCount);
            if (phase)
                Phase = _allocator.GetFloatMatrix(_STFT.RowCount, _STFT.ColumnCount);
            for (int i = 0; i < Spec.RowCount; i++)
            {
                for (int j = 0; j < Spec.ColumnCount; j++)
                {
                    Spec.At(i, j, _STFT.At(i, j).Magnitude);
                    if (phase)
                        Phase.At(i, j, _STFT.At(i, j).Phase);
                }
            }
            //Spec = _STFT.Map(c => (float)c.Magnitude);

            //phase
            //if (phase)
            //{
            //    var imag = _STFT.Map(c => (float)c.Imaginary);
            //    var real = _STFT.Map(c => (float)c.Real);
            //    Phase = real.Map2((r, i) => (float)Math.Atan2(i,r), imag);
            //}
        }
Example #3
0
        private void GetOnsets(Wav w, MemoryAllocator allocator)
        {
            //construct the spectrogram
            var s = new Spectrogram(w, allocator, _options.WindowSize, _options.FPS, _options.Online, NeedPhaseInformation(_options.DetectionFunction));

            //perform adaptive whitening
            if (_options.AdaptiveWhitening) s.AW(_options.AWFloor, _options.AWRelax);

            //construct the filterbank
            var filt = new Filter(_options.WindowSize / 2, w.Samplerate, allocator);

            //filter the spectrogram
            s.Filter(filt.Filterbank);

            //take the log of the spectrogram
            if (_options.Log) s.Log(_options.LogMultiplier, _options.LogAdd);

            //calculate the activations
            var sodf = new SpectralODF(s, allocator);
            var act = GetActivations(sodf, _options.DetectionFunction);

            //detect the onsets
            var o = new Onsets(act, _options.FPS);
            o.Detect(_options.ActivationThreshold, _options.MinimumTimeDelta,  delay: w.Delay * 1000);
            var count = o.Detections.Count(f => f < (w.Delay + w.Padding));

            //add the onsets to the collection
            lock (_lock)
            {
                _onsets.AddRange(o.Detections.Skip(count));
                _amplitudes.AddRange(o.Amplitudes.Skip(count));
            }

            _completed++;
            ProgressReporter.Report(String.Format("{0}%", Math.Round((((float)_completed / _sliceCount))*100f)));

            //cleanup
            s.Cleanup();
            filt.Cleanup();
        }
Example #4
0
        public List<Onset> Detect(ISampleSource audio)
        {
            _onsets.Clear();
            _completed = 0;
            _sliceCount = 0;
            _onsets = new List<float>();
            _amplitudes = new List<float>();
            var onsets = new List<Onset>();

            //init detection specific variables
            int sliceSampleSize = (int)Math.Ceiling(_options.SliceLength * audio.WaveFormat.SampleRate); //the size of each slice's sample
            int slicePaddingSize = (int)Math.Ceiling(_options.SlicePaddingLength * audio.WaveFormat.SampleRate);
            _sliceCount = (int)Math.Ceiling((float)audio.Length/audio.WaveFormat.Channels / sliceSampleSize); //the number of slices needed
            var samples = (int)audio.Length / audio.WaveFormat.Channels;

            //init parallel specific variables
            var pOptions = new ParallelOptions();
            if (_options.MaxDegreeOfParallelism != -1) pOptions.MaxDegreeOfParallelism = _options.MaxDegreeOfParallelism;
            ParallelLoopState loopState;

            List<Wav> wavSlices = new List<Wav>();
            for (int i = 0; i < _sliceCount; i++)
            {
                int baseStart = i * sliceSampleSize;
                int adjustedStart = (baseStart - sliceSampleSize > 0) ? baseStart - slicePaddingSize : 0;
                int count = (sliceSampleSize + slicePaddingSize + baseStart > samples) ? samples - adjustedStart : sliceSampleSize + (baseStart - adjustedStart) + slicePaddingSize;
                float delay = (float)adjustedStart / audio.WaveFormat.SampleRate;
                float[] buffer = new float[count * audio.WaveFormat.Channels];
                audio.SetPosition(TimeConverter.SampleSourceTimeConverter.ToTimeSpan(audio.WaveFormat, adjustedStart * audio.WaveFormat.Channels));
                audio.Read(buffer, 0, count * audio.WaveFormat.Channels);
                wavSlices.Add(new Wav(buffer, audio.WaveFormat.SampleRate, count, audio.WaveFormat.Channels) {
                    Delay = delay,
                    Padding = ((delay > 0) ? slicePaddingSize : 0) / audio.WaveFormat.SampleRate
                });
            }

            int bucketSize = 5;
            int bucketcount = (int)Math.Ceiling((double)wavSlices.Count / bucketSize);
            MemoryAllocator _allocator = new MemoryAllocator();

            for (int i = 0; i < bucketcount; i++)
            {
                _allocator.Reset();
                int count = bucketSize;
                if ((i + 1) * bucketSize > wavSlices.Count) count = wavSlices.Count - i * bucketSize;

                if (count < 0) continue;

                List<Wav> parallel = wavSlices.GetRange(i * bucketSize, count);
                var ploopResult = Parallel.ForEach(parallel, pOptions, (w, state) => GetOnsets(w, _allocator));
                if (!ploopResult.IsCompleted) throw new Exception();
            }

            onsets = _onsets.Zip(_amplitudes, (onset, amplitude) => new Onset { OnsetTime = onset, OnsetAmplitude = amplitude }).ToList();
            onsets = onsets.OrderBy(f => f.OnsetTime).ToList();

            float prev = 0;
            float combine = 0.03f;
            var ret = new List<Onset>();
            for (int i = 0; i < onsets.Count; i++)
            {
                if (onsets[i].OnsetTime - prev < _options.MinimumTimeDelta / 1000.0f)
                    continue;
                prev = onsets[i].OnsetTime;
                ret.Add(onsets[i]);
            }
            return ret;
        }
Example #5
0
        /// <summary>
        /// Creates a new Spectrogram object instance and performs a STFT on the given audio
        /// </summary>
        /// <param name="wav">a Wav object</param>
        /// <param name="windowSize">is the size for the window in samples</param>
        /// <param name="fps">is the desired frame rate</param>
        /// <param name="online">work in online mode (i.e. use only past audio information)</param>
        /// <param name="phase">include phase information</param>
        public Spectrogram(Wav wav, MemoryAllocator allocator, int windowSize = 2048, int fps = 200, bool online = true, bool phase = true)
        {
            _allocator = allocator;
            //init some variables
            _wav = wav;
            _fps = fps;
            //derive some variables
            HopSize = _wav.Samplerate / (float)_fps; //use floats so that seeking works properly
            _frames = (int)(_wav.Samples / HopSize);
            _ffts   = windowSize / 2;
            Bins    = windowSize / 2; //initial number equal to ffts, can change if filters are used

            //init STFT matrix
            _STFT = _allocator.GetComplex32Matrix(_frames, _ffts);
            //_STFT = DenseMatrix.Create(_frames, _ffts, Complex32.Zero);

            //create windowing function
            var cArray = wav.Audio.ToRowArrays()[0];

            var values = MathNet.Numerics.Window.Hann(windowSize).Select(d => (float)d).ToArray();

            Window = _allocator.GetFloatVector(values.Length);
            Window.SetValues(values);

            //Window = Vector<float>.Build.DenseOfArray(MathNet.Numerics.Window.Hann(windowSize).Select(d => (float)d).ToArray());

            //step through all frames
            System.Numerics.Complex[] result = new System.Numerics.Complex[Window.Count];
            foreach (var frame in Enumerable.Range(0, _frames))
            {
                int            seek;
                Vector <float> signal;
                //seek to the right position in the audio signal
                if (online)
                {
                    //step back a complete windowSize after moving forward 1 hopSize
                    //so that the current position is at the stop of the window
                    seek = (int)((frame + 1) * HopSize - windowSize);
                }
                else
                {
                    //step back half of the windowSize so that the frame represents the centre of the window
                    seek = (int)(frame * HopSize - windowSize / 2);
                }
                //read in the right portion of the audio
                if (seek >= _wav.Samples)
                {
                    //stop of file reached
                    break;
                }
                else if (seek + windowSize > _wav.Samples)
                {
                    //stop behind the actual audio stop, append zeros accordingly
                    int zeroAmount = seek + windowSize - _wav.Samples;
                    //var zeros = Vector<float>.Build.Dense(zeroAmount, 0);

                    var t = PythonUtilities.Slice <float>(cArray, seek, cArray.Length).ToArray();

                    //t.AddRange(zeros.ToList());

                    signal = _allocator.GetFloatVector(t.Length + zeroAmount);
                    for (int i = 0; i < t.Length; i++)
                    {
                        signal[i] = t[i];
                    }
                    //signal.SetValues(t);
                    //signal = Vector<float>.Build.DenseOfEnumerable(t);
                }
                else if (seek < 0)
                {
                    //start before actual audio start, pad with zeros accordingly
                    int zeroAmount = -seek;
                    var zeros      = Vector <float> .Build.Dense(zeroAmount, 0).ToList();

                    var t = PythonUtilities.Slice <float>(cArray, 0, seek + windowSize).ToArray();
                    zeros.AddRange(t);

                    signal = _allocator.GetFloatVector(t.Length + zeroAmount);
                    signal.SetValues(zeros.ToArray());
                    //signal = Vector<float>.Build.DenseOfEnumerable(zeros);
                }
                else
                {
                    //normal read operation
                    var slice = PythonUtilities.Slice <float>(cArray, seek, seek + windowSize).ToArray();
                    signal = _allocator.GetFloatVector(slice.Length);
                    signal.SetValues(slice);

                    //signal = Vector<float>.Build.DenseOfEnumerable(PythonUtilities.Slice<float>(cArray, seek, seek + windowSize));
                }
                //multiply the signal with the window function
                signal = signal.PointwiseMultiply(Window);
                //only shift and perform complex DFT if needed
                if (phase)
                {
                    //circular shift the signal (needed for correct phase)
                    signal = NumpyCompatibility.FFTShift(signal);
                }
                //perform DFT
                //sanity check
                Debug.Assert(result.Length == signal.Count);
                for (int i = 0; i < result.Length; i++)
                {
                    result[i] = signal[i];
                }
                MathNet.Numerics.IntegralTransforms.Fourier.BluesteinForward(result, MathNet.Numerics.IntegralTransforms.FourierOptions.NoScaling);
                _STFT.SetRow(frame, result.Select(r => new Complex32((float)r.Real, (float)r.Imaginary)).Take(_ffts).ToArray());
                //var _newSTFTRow = result.Select(r => new Complex32((float)r.Real, (float)r.Imaginary)).Take(_ffts).ToArray();
                //_STFT.SetRow(frame, _newSTFTRow);
                //next frame
                _allocator.ReturnFloatVectorStorage((MathNet.Numerics.LinearAlgebra.Storage.DenseVectorStorage <float>)signal.Storage);
            }
            //magnitude spectrogram

            Spec = _allocator.GetFloatMatrix(_STFT.RowCount, _STFT.ColumnCount);
            if (phase)
            {
                Phase = _allocator.GetFloatMatrix(_STFT.RowCount, _STFT.ColumnCount);
            }
            for (int i = 0; i < Spec.RowCount; i++)
            {
                for (int j = 0; j < Spec.ColumnCount; j++)
                {
                    Spec.At(i, j, _STFT.At(i, j).Magnitude);
                    if (phase)
                    {
                        Phase.At(i, j, _STFT.At(i, j).Phase);
                    }
                }
            }
            //Spec = _STFT.Map(c => (float)c.Magnitude);

            //phase
            //if (phase)
            //{
            //    var imag = _STFT.Map(c => (float)c.Imaginary);
            //    var real = _STFT.Map(c => (float)c.Real);
            //    Phase = real.Map2((r, i) => (float)Math.Atan2(i,r), imag);
            //}
        }
Example #6
0
        public List <Onset> Detect(ISampleSource audio)
        {
            _onsets.Clear();
            _completed  = 0;
            _sliceCount = 0;
            _onsets     = new List <float>();
            _amplitudes = new List <float>();
            var onsets = new List <Onset>();

            //init detection specific variables
            int sliceSampleSize  = (int)Math.Ceiling(_options.SliceLength * audio.WaveFormat.SampleRate); //the size of each slice's sample
            int slicePaddingSize = (int)Math.Ceiling(_options.SlicePaddingLength * audio.WaveFormat.SampleRate);

            _sliceCount = (int)Math.Ceiling((float)audio.Length / audio.WaveFormat.Channels / sliceSampleSize); //the number of slices needed
            var samples = (int)audio.Length / audio.WaveFormat.Channels;

            //init parallel specific variables
            var pOptions = new ParallelOptions();

            if (_options.MaxDegreeOfParallelism != -1)
            {
                pOptions.MaxDegreeOfParallelism = _options.MaxDegreeOfParallelism;
            }
            ParallelLoopState loopState;

            List <Wav> wavSlices = new List <Wav>();

            for (int i = 0; i < _sliceCount; i++)
            {
                int     baseStart     = i * sliceSampleSize;
                int     adjustedStart = (baseStart - sliceSampleSize > 0) ? baseStart - slicePaddingSize : 0;
                int     count         = (sliceSampleSize + slicePaddingSize + baseStart > samples) ? samples - adjustedStart : sliceSampleSize + (baseStart - adjustedStart) + slicePaddingSize;
                float   delay         = (float)adjustedStart / audio.WaveFormat.SampleRate;
                float[] buffer        = new float[count * audio.WaveFormat.Channels];
                audio.SetPosition(TimeConverter.SampleSourceTimeConverter.ToTimeSpan(audio.WaveFormat, adjustedStart * audio.WaveFormat.Channels));
                audio.Read(buffer, 0, count * audio.WaveFormat.Channels);
                wavSlices.Add(new Wav(buffer, audio.WaveFormat.SampleRate, count, audio.WaveFormat.Channels)
                {
                    Delay   = delay,
                    Padding = ((delay > 0) ? slicePaddingSize : 0) / audio.WaveFormat.SampleRate
                });
            }

            int             bucketSize  = 5;
            int             bucketcount = (int)Math.Ceiling((double)wavSlices.Count / bucketSize);
            MemoryAllocator _allocator  = new MemoryAllocator();

            for (int i = 0; i < bucketcount; i++)
            {
                _allocator.Reset();
                int count = bucketSize;
                if ((i + 1) * bucketSize > wavSlices.Count)
                {
                    count = wavSlices.Count - i * bucketSize;
                }

                if (count < 0)
                {
                    continue;
                }

                List <Wav> parallel    = wavSlices.GetRange(i * bucketSize, count);
                var        ploopResult = Parallel.ForEach(parallel, pOptions, (w, state) => GetOnsets(w, _allocator));
                if (!ploopResult.IsCompleted)
                {
                    throw new Exception();
                }
            }

            onsets = _onsets.Zip(_amplitudes, (onset, amplitude) => new Onset {
                OnsetTime = onset, OnsetAmplitude = amplitude
            }).ToList();
            onsets = onsets.OrderBy(f => f.OnsetTime).ToList();

            float prev    = 0;
            float combine = 0.03f;
            var   ret     = new List <Onset>();

            for (int i = 0; i < onsets.Count; i++)
            {
                if (onsets[i].OnsetTime - prev < _options.MinimumTimeDelta / 1000.0f)
                {
                    continue;
                }
                prev = onsets[i].OnsetTime;
                ret.Add(onsets[i]);
            }
            return(ret);
        }