protected IAudioStream PrepareStream(IAudioStream stream) { if (stream.Properties.Channels > 1) { stream = new MonoStream(stream); } if (stream.Properties.SampleRate != FrameReader.SAMPLERATE) { stream = new ResamplingStream(stream, ResamplingQuality.Medium, FrameReader.SAMPLERATE); } return stream; }
public void Generate() { IAudioStream audioStream = inputTrack.File ? AudioStreamFactory.FromFileInfoIeee32(inputTrack.FileInfo) : inputTrack.Stream; audioStream = new MonoStream(audioStream); audioStream = new ResamplingStream(audioStream, ResamplingQuality.Medium, profile.SampleRate); STFT stft = new STFT(audioStream, profile.FrameSize, profile.FrameStep, WindowType.Hann, STFT.OutputFormat.Decibel, this.bufferSize); index = 0; indices = stft.WindowCount; frameBuffer = new float[profile.FrameSize / 2]; List <SubFingerprint> subFingerprints = new List <SubFingerprint>(); while (stft.HasNext()) { // Get FFT spectrum stft.ReadFrame(frameBuffer); // Sum FFT bins into target frequency bands profile.MapFrequencies(frameBuffer, bands); CalculateSubFingerprint(bandsPrev, bands, subFingerprints); CommonUtil.Swap <float[]>(ref bands, ref bandsPrev); index++; // Output subfingerprints every once in a while if (index % this.eventInterval == 0 && SubFingerprintsGenerated != null) { SubFingerprintsGenerated(this, new SubFingerprintsGeneratedEventArgs(inputTrack, subFingerprints, index, indices)); subFingerprints.Clear(); } } // Output remaining subfingerprints if (SubFingerprintsGenerated != null) { SubFingerprintsGenerated(this, new SubFingerprintsGeneratedEventArgs(inputTrack, subFingerprints, index, indices)); } if (Completed != null) { Completed(this, EventArgs.Empty); } audioStream.Close(); }
public static IAudioStream PrepareStream(IAudioStream stream, int sampleRate) { if (stream.Properties.Format != AudioFormat.IEEE) { stream = new IeeeStream(stream); } if (stream.Properties.Channels > 1) { stream = new MonoStream(stream); } if (stream.Properties.SampleRate != 11050) { stream = new ResamplingStream(stream, ResamplingQuality.Medium, sampleRate); } return(stream); }
static void Process(Dictionary <string, double> mapping, DirectoryInfo indir, DirectoryInfo outdir) { Dictionary <FileInfo, double> fileMapping = new Dictionary <FileInfo, double>(); foreach (string fileNamePattern in mapping.Keys) { double factor = mapping[fileNamePattern]; foreach (FileInfo fileInfo in indir.EnumerateFiles(fileNamePattern)) { fileMapping.Add(fileInfo, factor); } } Parallel.ForEach <FileInfo>(fileMapping.Keys, (fileInfo) => { double factor = fileMapping[fileInfo]; FileInfo outputFileInfo = new FileInfo(Path.Combine(outdir.FullName, fileInfo.Name)); if (outputFileInfo.Exists) { Console.WriteLine(fileInfo.Name + " SKIP (file already existing)"); return; } Console.WriteLine(fileInfo.Name); try { IAudioStream inputStream = AudioStreamFactory.FromFileInfoIeee32(fileInfo); IAudioStream resamplingStream = new ResamplingStream(inputStream, ResamplingQuality.VeryHigh, factor); MixerStream sampleRateResetStream = new MixerStream(resamplingStream.Properties.Channels, inputStream.Properties.SampleRate); sampleRateResetStream.Add(resamplingStream); IAudioStream outputStream = sampleRateResetStream; AudioStreamFactory.WriteToFile(outputStream, outputFileInfo.FullName); } catch (Exception e) { Console.WriteLine("Error processing " + fileInfo.Name + ": " + e.Message); } }); }
private void button1_Click(object sender, RoutedEventArgs e) { Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog(); dlg.DefaultExt = ".wav"; dlg.Filter = "Wave files|*.wav"; if (dlg.ShowDialog() == true) { WaveFileReader reader = new WaveFileReader(dlg.FileName); NAudioSourceStream nAudioSource = new NAudioSourceStream(reader); IeeeStream ieee = new IeeeStream(nAudioSource); MonoStream mono = new MonoStream(ieee); ResamplingStream res = new ResamplingStream(mono, ResamplingQuality.Medium, 22050); NAudioSinkStream nAudioSink = new NAudioSinkStream(res); WaveFileWriter.CreateWaveFile(dlg.FileName + ".processed.wav", nAudioSink); } }
public void Generate(AudioTrack track) { IAudioStream audioStream = new ResamplingStream( new MonoStream(AudioStreamFactory.FromFileInfoIeee32(track.FileInfo)), ResamplingQuality.Medium, profile.SamplingRate); STFT stft = new STFT(audioStream, profile.WindowSize, profile.HopSize, WindowType.Hann, STFT.OutputFormat.Decibel); int index = 0; int indices = stft.WindowCount; int processedFrames = 0; float[] spectrum = new float[profile.WindowSize / 2]; float[] smoothedSpectrum = new float[spectrum.Length - profile.SpectrumSmoothingLength + 1]; // the smooved frequency spectrum of the current frame var spectrumSmoother = new SimpleMovingAverage(profile.SpectrumSmoothingLength); float[] spectrumTemporalAverage = new float[spectrum.Length]; // a running average of each spectrum bin over time float[] spectrumResidual = new float[spectrum.Length]; // the difference between the current spectrum and the moving average spectrum var peakHistory = new PeakHistory(1 + profile.TargetZoneDistance + profile.TargetZoneLength, spectrum.Length / 2); var peakPairs = new List <PeakPair>(profile.PeaksPerFrame * profile.PeakFanout); // keep a single instance of the list to avoid instantiation overhead var subFingerprints = new List <SubFingerprint>(); while (stft.HasNext()) { // Get the FFT spectrum stft.ReadFrame(spectrum); // Skip frames whose average spectrum volume is below the threshold // This skips silent frames (zero samples) that only contain very low noise from the FFT // and that would screw up the temporal spectrum average below for the following frames. if (spectrum.Average() < spectrumMinThreshold) { index++; continue; } // Smooth the frequency spectrum to remove small peaks if (profile.SpectrumSmoothingLength > 0) { spectrumSmoother.Clear(); for (int i = 0; i < spectrum.Length; i++) { var avg = spectrumSmoother.Add(spectrum[i]); if (i >= profile.SpectrumSmoothingLength) { smoothedSpectrum[i - profile.SpectrumSmoothingLength] = avg; } } } // Update the temporal moving bin average if (processedFrames == 0) { // Init averages on first frame for (int i = 0; i < spectrum.Length; i++) { spectrumTemporalAverage[i] = spectrum[i]; } } else { // Update averages on all subsequent frames for (int i = 0; i < spectrum.Length; i++) { spectrumTemporalAverage[i] = ExponentialMovingAverage.UpdateMovingAverage( spectrumTemporalAverage[i], profile.SpectrumTemporalSmoothingCoefficient, spectrum[i]); } } // Calculate the residual // The residual is the difference of the current spectrum to the temporal average spectrum. The higher // a bin residual is, the steeper the increase in energy in that peak. for (int i = 0; i < spectrum.Length; i++) { spectrumResidual[i] = spectrum[i] - spectrumTemporalAverage[i] - 90f; } // Find local peaks in the residual // The advantage of finding peaks in the residual instead of the spectrum is that spectrum energy is usually // concentrated in the low frequencies, resulting in a clustering of the highest peaks in the lows. Getting // peaks from the residual distributes the peaks more evenly across the spectrum. var peaks = peakHistory.List; // take oldest list, peaks.Clear(); // clear it, and FindLocalMaxima(spectrumResidual, peaks); // refill with new peaks // Pick the largest n peaks int numMaxima = Math.Min(peaks.Count, profile.PeaksPerFrame); if (numMaxima > 0) { peaks.Sort((p1, p2) => p1.Value == p2.Value ? 0 : p1.Value < p2.Value ? 1 : -1); // order peaks by height if (peaks.Count > numMaxima) { peaks.RemoveRange(numMaxima, peaks.Count - numMaxima); // select the n tallest peaks by deleting the rest } peaks.Sort((p1, p2) => p1.Index == p2.Index ? 0 : p1.Index < p2.Index ? -1 : 1); // sort peaks by index (not really necessary) } peakHistory.Add(index, peaks); if (FrameProcessed != null) { // Mark peaks as 0dB for spectrogram display purposes foreach (var peak in peaks) { spectrum[peak.Index] = 0; spectrumResidual[peak.Index] = 0; } FrameProcessed(this, new FrameProcessedEventArgs { AudioTrack = track, Index = index, Indices = indices, Spectrum = spectrum, SpectrumResidual = spectrumResidual }); } processedFrames++; index++; if (processedFrames >= peakHistory.Length) { peakPairs.Clear(); FindPairsWithMaxEnergy(peakHistory, peakPairs); ConvertPairsToSubFingerprints(peakPairs, subFingerprints); } if (subFingerprints.Count > 512) { FireFingerprintHashesGenerated(track, indices, subFingerprints); subFingerprints.Clear(); } } // Flush the remaining peaks of the last frames from the history to get all remaining pairs for (int i = 0; i < profile.TargetZoneLength; i++) { var peaks = peakHistory.List; peaks.Clear(); peakHistory.Add(-1, peaks); peakPairs.Clear(); FindPairsWithMaxEnergy(peakHistory, peakPairs); ConvertPairsToSubFingerprints(peakPairs, subFingerprints); } FireFingerprintHashesGenerated(track, indices, subFingerprints); audioStream.Close(); }
/// <summary> /// This method generates hash codes from an audio stream in a streaming fashion, /// which means that it only maintains a small constant-size state and can process /// streams of arbitrary length. /// /// Here is a scheme of the data processing flow. After the subband splitting /// stage, every subband is processed independently. /// /// +-----------------+ +--------------+ +-----------+ /// audio stream +---> mono conversion +---> downsampling +---> whitening | /// +-----------------+ +--------------+ +---------+-+ /// | /// +-------------------+ +------------------+ | /// | subband splitting <---+ subband analysis <---+ /// +--+---+---+---+----+ +------------------+ /// | | | | /// | v v v /// | ... ... ... /// | /// | +------------------+ +-----------------+ /// +---> RMS downsampling +---> onset detection | /// +------------------+ +----------+------+ /// | /// +-----------------+ | /// hash codes <-------------------+ hash generation <----------+ /// +-----------------+ /// /// The hash codes from the hash generators of each band are then sent though a /// sorter which brings them into sequential temporal order before they are stored /// in the final list. /// </summary> /// <param name="track"></param> public void Generate(AudioTrack track) { IAudioStream audioStream = new ResamplingStream( new MonoStream(AudioStreamFactory.FromFileInfoIeee32(track.FileInfo)), ResamplingQuality.Medium, profile.SamplingRate); var whiteningStream = new WhiteningStream(audioStream, profile.WhiteningNumPoles, profile.WhiteningDecaySecs, profile.WhiteningBlockLength); var subbandAnalyzer = new SubbandAnalyzer(whiteningStream); float[] analyzedFrame = new float[profile.SubBands]; var bandAnalyzers = new BandAnalyzer[profile.SubBands]; for (int i = 0; i < profile.SubBands; i++) { bandAnalyzers[i] = new BandAnalyzer(profile, i); } List <SubFingerprint> hashes = new List <SubFingerprint>(); HashTimeSorter hashSorter = new HashTimeSorter(profile.SubBands); var sw = new Stopwatch(); sw.Start(); int totalFrames = subbandAnalyzer.WindowCount; int currentFrame = 0; while (subbandAnalyzer.HasNext()) { subbandAnalyzer.ReadFrame(analyzedFrame); for (int i = 0; i < profile.SubBands; i++) { bandAnalyzers[i].ProcessSample(analyzedFrame[i], hashSorter.Queues[i]); } if (currentFrame % 4096 == 0) { hashSorter.Fill(hashes, false); if (SubFingerprintsGenerated != null) { SubFingerprintsGenerated(this, new SubFingerprintsGeneratedEventArgs(track, hashes, currentFrame, totalFrames)); hashes.Clear(); } } currentFrame++; } for (int i = 0; i < bandAnalyzers.Length; i++) { bandAnalyzers[i].Flush(hashSorter.Queues[i]); } hashSorter.Fill(hashes, true); if (SubFingerprintsGenerated != null) { SubFingerprintsGenerated(this, new SubFingerprintsGeneratedEventArgs(track, hashes, currentFrame, totalFrames)); hashes.Clear(); } sw.Stop(); audioStream.Close(); Console.WriteLine("time: " + sw.Elapsed); }
public float Run() { IAudioStream audioStream = new ResamplingStream( new MonoStream(AudioStreamFactory.FromFileInfoIeee32(audioTrack.FileInfo)), ResamplingQuality.Medium, 11000); ContinuousFrequencyActivationQuantifier cfaq = new ContinuousFrequencyActivationQuantifier(audioStream); float[] cfaValue = new float[1]; float[] cfaValues = new float[cfaq.WindowCount]; Label[] cfaLabels = new Label[cfaq.WindowCount]; int count = 0; int musicCount = 0; while (cfaq.HasNext()) { cfaq.ReadFrame(cfaValue); cfaValues[count] = cfaValue[0]; if (cfaValue[0] > threshold) { musicCount++; cfaLabels[count] = Label.MUSIC; } Console.WriteLine("cfa {0,3}% {3} {1,5:0.00} {2}", (int)(Math.Round((float)count++ / cfaq.WindowCount * 100)), cfaValue[0], cfaValue[0] > threshold ? "MUSIC" : "", TimeUtil.BytesToTimeSpan(audioStream.Position, audioStream.Properties)); } audioStream.Close(); if (smoothing) { // 3.3 Smoothing /* majority filtering with sliding window ~5 secs * 1 frame = ~2,4 secs, at least 3 frames are needed for majority filtering -> 3 * ~2,4 secs = ~7,2 secs */ // filter out single NO_MUSIC frames for (int i = 2; i < cfaLabels.Length; i++) { if (cfaLabels[i - 2] == Label.MUSIC && cfaLabels[i - 1] == Label.NO_MUSIC && cfaLabels[i] == Label.MUSIC) { cfaLabels[i - 1] = Label.MUSIC; } } // filter out single MUSIC frames for (int i = 2; i < cfaLabels.Length; i++) { if (cfaLabels[i - 2] == Label.NO_MUSIC && cfaLabels[i - 1] == Label.MUSIC && cfaLabels[i] == Label.NO_MUSIC) { cfaLabels[i - 1] = Label.NO_MUSIC; } } // swap ~5 secs NO_MUSIC segments to MUSIC for (int i = 3; i < cfaLabels.Length; i++) { if (cfaLabels[i - 3] == Label.MUSIC && cfaLabels[i - 2] == Label.NO_MUSIC && cfaLabels[i - 1] == Label.NO_MUSIC && cfaLabels[i] == Label.MUSIC) { cfaLabels[i - 1] = Label.MUSIC; cfaLabels[i - 2] = Label.MUSIC; } } // swap ~5 secs NMUSIC segments to NO_MUSIC for (int i = 3; i < cfaLabels.Length; i++) { if (cfaLabels[i - 3] == Label.NO_MUSIC && cfaLabels[i - 2] == Label.MUSIC && cfaLabels[i - 1] == Label.MUSIC && cfaLabels[i] == Label.NO_MUSIC) { cfaLabels[i - 1] = Label.NO_MUSIC; cfaLabels[i - 2] = Label.NO_MUSIC; } } } float musicRatio = (float)musicCount / count; float musicRatioSmoothed = -1f; Console.WriteLine("'" + audioTrack.FileInfo.FullName + "' contains " + ((int)(Math.Round(musicRatio * 100))) + "% music"); if (smoothing) { musicCount = cfaLabels.Count <Label>(l => l == Label.MUSIC); musicRatioSmoothed = (float)musicCount / count; Console.WriteLine("smoothed: " + ((int)(Math.Round(musicRatioSmoothed * 100))) + "% music"); } if (writeLog) { FileInfo logFile = new FileInfo(audioTrack.FileInfo.FullName + ".music"); StreamWriter writer = logFile.CreateText(); writer.WriteLine(musicRatio + "; " + musicRatioSmoothed); writer.WriteLine(threshold); for (int i = 0; i < cfaValues.Length; i++) { writer.WriteLine("{0:0.00000}; {1}; \t{2}", cfaValues[i], cfaValues[i] > threshold ? Label.MUSIC : Label.NO_MUSIC, cfaLabels[i]); } writer.Flush(); writer.Close(); } return(0); }
public void MyTestInitialize() { stream = new ResamplingStream( new NullStream(new AudioProperties(1, 44100, 32, AudioFormat.IEEE), 1000), ResamplingQuality.VeryHigh); }
private void AddTrack(AudioTrack audioTrack) { if (audioTrack.SourceProperties.SampleRate > audioMixer.SampleRate) { // The newly added track has a higher samplerate than the current tracks, so we adjust // the processing samplerate to the highest rate ChangeMixingSampleRate(audioTrack.SourceProperties.SampleRate); } IAudioStream input = audioTrack.CreateAudioStream(); IAudioStream baseStream = new TolerantStream(new BufferedStream(input, 1024 * 256 * input.SampleBlockSize, true)); OffsetStream offsetStream = new OffsetStream(baseStream) { Offset = TimeUtil.TimeSpanToBytes(audioTrack.Offset, baseStream.Properties) }; audioTrack.OffsetChanged += new EventHandler <ValueEventArgs <TimeSpan> >( delegate(object sender, ValueEventArgs <TimeSpan> e) { offsetStream.Offset = TimeUtil.TimeSpanToBytes(e.Value, offsetStream.Properties); audioMixer.UpdateLength(); }); // Upmix mono inputs to dual channel stereo or downmix surround to allow channel balancing // TODO add better multichannel stream support and allow balancing of surround IAudioStream mixToStereoStream = offsetStream; if (mixToStereoStream.Properties.Channels == 1) { mixToStereoStream = new MonoStream(mixToStereoStream, 2); } else if (mixToStereoStream.Properties.Channels > 2) { mixToStereoStream = new SurroundDownmixStream(mixToStereoStream); } // control the track phase PhaseInversionStream phaseInversion = new PhaseInversionStream(mixToStereoStream) { Invert = audioTrack.InvertedPhase }; MonoStream monoStream = new MonoStream(phaseInversion, phaseInversion.Properties.Channels) { Downmix = audioTrack.MonoDownmix }; // necessary to control each track individually VolumeControlStream volumeControl = new VolumeControlStream(monoStream) { Mute = audioTrack.Mute, Volume = audioTrack.Volume, Balance = audioTrack.Balance }; // when the AudioTrack.Mute property changes, just set it accordingly on the audio stream audioTrack.MuteChanged += new EventHandler <ValueEventArgs <bool> >( delegate(object vsender, ValueEventArgs <bool> ve) { volumeControl.Mute = ve.Value; }); // when the AudioTrack.Solo property changes, we have to react in different ways: audioTrack.SoloChanged += new EventHandler <ValueEventArgs <bool> >( delegate(object vsender, ValueEventArgs <bool> ve) { AudioTrack senderTrack = (AudioTrack)vsender; bool isOtherTrackSoloed = false; foreach (AudioTrack vaudioTrack in trackList) { if (vaudioTrack != senderTrack && vaudioTrack.Solo) { isOtherTrackSoloed = true; break; } } /* if there's at least one other track that is soloed, we set the mute property of * the current track to the opposite of the solo property: * - if the track is soloed, we unmute it * - if the track is unsoloed, we mute it */ if (isOtherTrackSoloed) { senderTrack.Mute = !ve.Value; } /* if this is the only soloed track, we mute all other tracks * if this track just got unsoloed, we unmute all other tracks */ else { foreach (AudioTrack vaudioTrack in trackList) { if (vaudioTrack != senderTrack && !vaudioTrack.Solo) { vaudioTrack.Mute = ve.Value; } } } }); // when the AudioTrack.Volume property changes, just set it accordingly on the audio stream audioTrack.VolumeChanged += new EventHandler <ValueEventArgs <float> >( delegate(object vsender, ValueEventArgs <float> ve) { volumeControl.Volume = ve.Value; }); audioTrack.BalanceChanged += new EventHandler <ValueEventArgs <float> >( delegate(object vsender, ValueEventArgs <float> ve) { volumeControl.Balance = ve.Value; }); audioTrack.InvertedPhaseChanged += new EventHandler <ValueEventArgs <bool> >( delegate(object vsender, ValueEventArgs <bool> ve) { phaseInversion.Invert = ve.Value; }); audioTrack.MonoDownmixChanged += new EventHandler <ValueEventArgs <bool> >( delegate(object vsender, ValueEventArgs <bool> ve) { monoStream.Downmix = ve.Value; }); // adjust sample rate to mixer output rate ResamplingStream resamplingStream = new ResamplingStream(volumeControl, ResamplingQuality.Medium, audioMixer.Properties.SampleRate); IAudioStream trackStream = resamplingStream; if (trackStream.Properties.Channels == 1 && audioMixer.Properties.Channels > 1) { trackStream = new MonoStream(trackStream, audioMixer.Properties.Channels); } audioMixer.Add(trackStream); trackListStreams.Add(audioTrack, trackStream); }
public void Generate(AudioTrack track) { IAudioStream audioStream = new ResamplingStream( new MonoStream(AudioStreamFactory.FromFileInfoIeee32(track.FileInfo)), ResamplingQuality.Medium, profile.SamplingRate); var chroma = new Chroma(audioStream, profile.WindowSize, profile.HopSize, profile.WindowType, profile.ChromaMinFrequency, profile.ChromaMaxFrequency, false, profile.ChromaMappingMode); float[] chromaFrame; var chromaBuffer = new RingBuffer <float[]>(profile.ChromaFilterCoefficients.Length); var chromaFilterCoefficients = profile.ChromaFilterCoefficients; var filteredChromaFrame = new double[Chroma.Bins]; var classifiers = profile.Classifiers; var maxFilterWidth = classifiers.Max(c => c.Filter.Width); var integralImage = new IntegralImage(maxFilterWidth, Chroma.Bins); int index = 0; int indices = chroma.WindowCount; var subFingerprints = new List <SubFingerprint>(); while (chroma.HasNext()) { // Get chroma frame buffer // When the chroma buffer is full, we can take and reuse the oldest array chromaFrame = chromaBuffer.Count == chromaBuffer.Length ? chromaBuffer[0] : new float[Chroma.Bins]; // Read chroma frame into buffer chroma.ReadFrame(chromaFrame); // ChromaFilter chromaBuffer.Add(chromaFrame); if (chromaBuffer.Count < chromaBuffer.Length) { // Wait for the buffer to fill completely for the filtering to start continue; } Array.Clear(filteredChromaFrame, 0, filteredChromaFrame.Length); for (int i = 0; i < chromaFilterCoefficients.Length; i++) { var frame = chromaBuffer[i]; for (int j = 0; j < frame.Length; j++) { filteredChromaFrame[j] += frame[j] * chromaFilterCoefficients[i]; } } // ChromaNormalizer double euclideanNorm = 0; for (int i = 0; i < filteredChromaFrame.Length; i++) { var value = filteredChromaFrame[i]; euclideanNorm += value * value; } euclideanNorm = Math.Sqrt(euclideanNorm); if (euclideanNorm < profile.ChromaNormalizationThreshold) { Array.Clear(filteredChromaFrame, 0, filteredChromaFrame.Length); } else { for (int i = 0; i < filteredChromaFrame.Length; i++) { filteredChromaFrame[i] /= euclideanNorm; } } // ImageBuilder // ... just add one feature vector after another as rows to the image integralImage.AddColumn(filteredChromaFrame); // FingerprintCalculator if (integralImage.Columns < maxFilterWidth) { // Wait for the image to fill completely before hashes can be generated continue; } // Calculate subfingerprint hash uint hash = 0; for (int i = 0; i < classifiers.Length; i++) { hash = (hash << 2) | grayCodeMapping[classifiers[i].Classify(integralImage, 0)]; } // We have a SubFingerprint@frameTime subFingerprints.Add(new SubFingerprint(index, new SubFingerprintHash(hash), false)); index++; if (index % 512 == 0 && SubFingerprintsGenerated != null) { SubFingerprintsGenerated(this, new SubFingerprintsGeneratedEventArgs(track, subFingerprints, index, indices)); subFingerprints.Clear(); } } if (SubFingerprintsGenerated != null) { SubFingerprintsGenerated(this, new SubFingerprintsGeneratedEventArgs(track, subFingerprints, index, indices)); } if (Completed != null) { Completed(this, EventArgs.Empty); } audioStream.Close(); }