/// <summary> /// Initializes this resampler with the given input audio source and output format. /// Attaches to the given source's event to start resampling as soon as <see cref="IStreamAudioSource.DataAvailable"/> is raised. /// </summary> /// <param name="audioSource"></param> /// <param name="outputFormat"></param> public void Initialize(IStreamAudioSource audioSource, WaveStreamAudioFormat outputFormat) { this.WaveProviderAdapter = new NAudioStreamAudioSourceToWaveProviderAdapterSimple(audioSource); this.Resampler = new MediaFoundationResampler(this.WaveProviderAdapter, NAudioUtilities.ToNAudioWaveFormat(outputFormat)); //set this *after* we initialize the resampler. if it throws, we won't dispose the input audio source by accident this.WrappedAudioSource = audioSource; this.Format = outputFormat; //handle events from the wrapped source audioSource.DataAvailable += (s, e) => { //feed into our adapter WaveProviderAdapter.Write(e); //read from resampler and trigger our own output event int read; while ((read = Resampler.Read(Buffer, 0, Buffer.Length)) > 0) { DataAvailable?.Invoke(this, new StreamAudioSourceDataEvent() { Buffer = new ArraySegment <byte>(Buffer, 0, read), Format = Format }); } }; }
/// <summary> /// Initializes this audio source with the given audio device. <see cref="Start"/> may be called afterwards. /// </summary> /// <param name="device"></param> public void SetAudioDevice(AudioDevice device) { this.Device = device; var naudioDevice = NAudioUtilities.GetDevice(device); if (naudioDevice.DataFlow == DataFlow.Capture) { this.Capture = new WasapiCapture(naudioDevice); } else { this.Capture = new WasapiLoopbackCapture(naudioDevice); } this.Format = NAudioUtilities.FromNAudioWaveFormat(this.Capture.WaveFormat); //set up event listeners this.Capture.DataAvailable += (s, e) => { if (e.BytesRecorded == 0) { return; } this.DataAvailable?.Invoke(this, new StreamAudioSourceDataEvent() { Buffer = new ArraySegment <byte>(e.Buffer, 0, e.BytesRecorded), Format = Format }); }; this.Capture.RecordingStopped += (s, e) => { var cause = StreamAudioSourceStoppedCause.Unknown; if (StopRequested) { cause = StreamAudioSourceStoppedCause.Stopped; } else if (e.Exception != null) { cause = StreamAudioSourceStoppedCause.Exception; } this.Stopped?.Invoke(this, new StreamAudioSourceStoppedEvent() { Cause = cause, Exception = e.Exception }); }; }
/// <summary> /// Translates the given <see cref="WaveStreamAudioFormat"/> into one of NAudio's <see cref="WaveFormat"/>. /// </summary> /// <param name="format"></param> /// <returns></returns> public static WaveFormat ToNAudioWaveFormat(WaveStreamAudioFormat format) { if (format.IntEncoded) { return(new WaveFormat(format.SampleRate, format.BitsPerSample, format.ChannelCount)); } else if (format.FloatEncoded) { return(WaveFormat.CreateIeeeFloatWaveFormat(format.SampleRate, format.ChannelCount)); } else { throw new ArgumentException($"Unknown WaveStreamAudioFormat encoding: neither IntEncoded nor FloatEncoded"); } }
/// <summary> /// Translates the given NAudio Wave format to our internal <see cref="WaveStreamAudioFormat"/>. /// </summary> /// <param name="format"></param> /// <returns></returns> public static WaveStreamAudioFormat FromNAudioWaveFormat(WaveFormat format) { if (format.Encoding == WaveFormatEncoding.Pcm) { return(WaveStreamAudioFormat.GetIntFormat(format.SampleRate, format.BitsPerSample, format.Channels)); } else if (format.Encoding == WaveFormatEncoding.IeeeFloat) { return(WaveStreamAudioFormat.GetFloatFormat(format.SampleRate, format.BitsPerSample, format.Channels)); } else { throw new ArgumentException($"Unknown NAudio WaveFormatEncoding: {format.Encoding}"); } }
/// <summary> /// Returns the sample with endianness correction if required: /// the returned endianness matches <see cref="BitConverter.IsLittleEndian"/>. /// </summary> /// <param name="sample"></param> /// <param name="format"></param> /// <returns></returns> public static ArraySegment <byte> GetSampleBytes(ArraySegment <byte> sample, WaveStreamAudioFormat format) { if (format.ByteOrderLittleEndian == BitConverter.IsLittleEndian) { return(sample); } //copy and reverse the bytes int byteCount = format.BitsPerSample / 8; byte[] sampleReversed = new byte[byteCount]; for (int i = 0; i < byteCount; ++i) { sampleReversed[i] = sample[byteCount - i - 1]; } return(sampleReversed); }
public IStreamAudioResampler GetResampler(IStreamAudioSource input, WaveStreamAudioFormat outputFormat) { //try MediaFoundation first (Windows) var mediaFoundationResampler = ServiceProvider.GetService(typeof(NAudioMediaFoundationStreamAudioResampler)) as NAudioMediaFoundationStreamAudioResampler; if (mediaFoundationResampler != null) { try { mediaFoundationResampler.Initialize(input, outputFormat); return(mediaFoundationResampler); } catch (Exception ex) { Logger.LogError(ex, "Could not initialize NAudio MediaFoundation resampler"); mediaFoundationResampler.Dispose(); } } return(null); }
/// <summary> /// See <see cref="GetAudioConfig"/>: constructs a <see cref="IStreamAudioSource"/> for the given audio source (usually an <see cref="AudioDevice"/>). /// Returns null if no <see cref="IStreamAudioSource"/> can be constructed from this source. /// </summary> /// <param name="configSource"></param> /// <returns></returns> protected IStreamAudioSource GetStreamAudioSource(IAudioSource configSource) { IStreamAudioSource streamSource = configSource as IStreamAudioSource; if (streamSource != null) { //take care to keep the original stream intact streamSource = new NonDisposingStreamAudioSource(streamSource); } else { //read from blob/device var provider = ServiceProvider.GetService(typeof(IStreamAudioSourceProvider)) as IStreamAudioSourceProvider; streamSource = provider?.GetStreamAudioSource(configSource); } if (streamSource == null) { Logger.LogError($"Cannot read from AudioDevice: could not construct an IStreamAudioSource"); return(null); } //add a noisegate to avoid unnecessary costs when there's no actual sound. probably best to do that before resampling => saves some processing power var noiseGateProvider = ServiceProvider.GetService(typeof(INoiseGateStreamAudioProcessProvider)) as INoiseGateStreamAudioProcessProvider; var noiseGate = noiseGateProvider?.GetNoiseGate(streamSource); if (noiseGate != null) { noiseGate.SetOptions(new NoiseGateStreamAudioProcessorOptions() { VolumeThreshold = Config.VolumeThreshold, Delay = TimeSpan.FromSeconds(5), DelayStopDetection = TimeSpan.FromSeconds(2) }); streamSource = noiseGate; StreamAudioNoiseGate = noiseGate; } else { Logger.LogWarning($"No noisegate available. Input format: {streamSource.Format}"); } //check on the wave format //according to https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/audio-processing-overview#minimum-requirements-to-use-microsoft-audio-stack //-we need multiples of 16kHz //-"32-bit IEEE little endian float, 32-bit little endian signed int, 24-bit little endian signed int, 16-bit little endian signed int, or 8-bit signed int" //--(though the API doesn't seem to allow float formats) //additionally: more than one channel increases the API cost var sourceFormat = streamSource.Format; bool resampleRequired = sourceFormat.SampleRate % 16000 != 0 || !sourceFormat.IntEncoded || !sourceFormat.IntEncodingSigned || !sourceFormat.ByteOrderLittleEndian || sourceFormat.ChannelCount > 1; if (resampleRequired) { //get a resampler WaveStreamAudioFormat targetFormat = WaveStreamAudioFormat.GetIntFormat( sampleRate: sourceFormat.SampleRate / 16000 * 16000, bitsPerSample: Math.Min(sourceFormat.BitsPerSample / 8 * 8, 32), channelCount: 1, signed: true, littleEndian: true ); var provider = ServiceProvider.GetService(typeof(IStreamAudioResamplerProvider)) as IStreamAudioResamplerProvider; var resampler = provider?.GetResampler(streamSource, targetFormat); if (resampler != null) { streamSource = resampler; } else { //can't resample Logger.LogError($"No resampler available. Input format: {sourceFormat}"); streamSource.Dispose(); return(null); } } return(streamSource); }
/// <summary> /// Calculates the length of the given samples. I.e., how long a capture device would take to produce that many samples. /// </summary> /// <param name="samples"></param> /// <param name="format"></param> /// <returns></returns> public static TimeSpan SamplesToTime(long samples, WaveStreamAudioFormat format) { return(TimeSpan.FromSeconds(((double)samples) / format.SampleRate / format.ChannelCount)); }
/// <summary> /// Analyzes the given sample and maps it to the range of <see cref="NoiseGateStreamAudioProcessorOptions.VolumeThreshold"/>. /// </summary> /// <param name="sample"></param> /// <param name="format"></param> /// <returns></returns> public static float GetNormalizedSampleValue(ArraySegment <byte> sample, WaveStreamAudioFormat format) { sample = GetSampleBytes(sample, format); if (format.FloatEncoded) { if (format.BitsPerSample == 32) { float f = BitConverter.ToSingle(sample); //1 = 0dB = highest volume without distortion return(Math.Abs(f) * 100); } throw new ArgumentException($"Unsupported float encoding: BitsPerSample={format.BitsPerSample}"); } else if (format.IntEncoded) { uint value; uint max; if (format.BitsPerSample == 32) { max = int.MaxValue; if (format.IntEncodingSigned) { int i = BitConverter.ToInt32(sample); if (i == int.MinValue) { ++i; } value = (uint)Math.Abs(i); } else { value = BitConverter.ToUInt32(sample); } } else if (format.BitsPerSample == 16) { max = (uint)short.MaxValue; if (format.IntEncodingSigned) { short i = BitConverter.ToInt16(sample); if (i == short.MinValue) { ++i; } value = (uint)Math.Abs(i); } else { value = BitConverter.ToUInt16(sample); } } else if (format.BitsPerSample == 8) { max = (uint)sbyte.MaxValue; if (format.IntEncodingSigned) { int i = sample[0]; if (i == sbyte.MinValue) { ++i; } value = (uint)Math.Abs(i); } else { value = sample[0]; } } else if (format.BitsPerSample == 24) { max = 16777215; int i; if (BitConverter.IsLittleEndian) { i = (sample[2] << 16) | (sample[1] << 8) | sample[0]; } else { i = (sample[0] << 16) | (sample[1] << 8) | sample[2]; } if (format.IntEncodingSigned) { if ((i & (1 << 24)) != 0) { //is negative i |= 0xFF; } value = (uint)Math.Abs(i); } else { value = (uint)i; } } else { throw new ArgumentException($"Unsupported int encoding: BitsPerSample={format.BitsPerSample}, Signed={format.IntEncodingSigned}"); } if (!format.IntEncodingSigned) { //max == middle currently. shift all values so that a "middle" sample = 0 = min value if (value < max) { value = max + (max - value); } value -= max; if (value >= max) { value = max; } } //100 must be a double. the mantissa of a float isn't large enough, thus we might get some unexpected results when value is close to max return((float)(value / (max / 100.0))); } throw new ArgumentException($"Unsupported wave encoding: neither int nor float"); }