public IStreamAudioSource GetStreamAudioSource(IAudioSource source) { IStreamAudioSource streamSource = source as IStreamAudioSource; if (streamSource != null) { return(streamSource); } if (source is AudioDevice audioDevice) { var provider = ServiceProvider.GetService(typeof(IDeviceStreamAudioSourceProvider)) as IDeviceStreamAudioSourceProvider; streamSource = provider?.GetStreamAudioSource(audioDevice); if (streamSource != null) { return(streamSource); } } if (source is AudioBlob audioBlob) { var provider = ServiceProvider.GetService(typeof(IBlobStreamAudioSourceProvider)) as IBlobStreamAudioSourceProvider; streamSource = provider?.GetStreamAudioSource(audioBlob); if (streamSource != null) { return(streamSource); } } return(null); }
/// <summary> /// Initializes this resampler with the given input audio source and output format. /// Attaches to the given source's event to start resampling as soon as <see cref="IStreamAudioSource.DataAvailable"/> is raised. /// </summary> /// <param name="audioSource"></param> /// <param name="outputFormat"></param> public void Initialize(IStreamAudioSource audioSource, WaveStreamAudioFormat outputFormat) { this.WaveProviderAdapter = new NAudioStreamAudioSourceToWaveProviderAdapterSimple(audioSource); this.Resampler = new MediaFoundationResampler(this.WaveProviderAdapter, NAudioUtilities.ToNAudioWaveFormat(outputFormat)); //set this *after* we initialize the resampler. if it throws, we won't dispose the input audio source by accident this.WrappedAudioSource = audioSource; this.Format = outputFormat; //handle events from the wrapped source audioSource.DataAvailable += (s, e) => { //feed into our adapter WaveProviderAdapter.Write(e); //read from resampler and trigger our own output event int read; while ((read = Resampler.Read(Buffer, 0, Buffer.Length)) > 0) { DataAvailable?.Invoke(this, new StreamAudioSourceDataEvent() { Buffer = new ArraySegment <byte>(Buffer, 0, read), Format = Format }); } }; }
/// <summary> /// Constructs an <see cref="AudioConfig"/> from <see cref="Config"/>. /// Depending on the available services, this may either use the audio features built into the Speech SDK (such as <see cref="AudioConfig.FromDefaultMicrophoneInput"/>), /// or it may construct a <see cref="IStreamAudioSource"/> that accesses the requested <see cref="AudioDevice"/> with resampling and noise gates as required. /// </summary> /// <returns></returns> protected AudioConfig GetAudioConfig() { var streamSource = GetStreamAudioSource(Config.AudioSource); if (streamSource != null) { //use this stream source and convert to an Azure audio stream try { var azureInput = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM( (uint)streamSource.Format.SampleRate, (byte)streamSource.Format.BitsPerSample, (byte)streamSource.Format.ChannelCount)); byte[] bufferOptional = null; streamSource.DataAvailable += (s, e) => { azureInput.Write(e.Buffer.GetArray(ref bufferOptional), e.Buffer.Count); }; streamSource.Stopped += (s, e) => { if (e.Cause == StreamAudioSourceStoppedCause.Stopped) { //signal end-of-stream to Azure azureInput.Close(); } }; this.StreamAudioSource = streamSource; return(AudioConfig.FromStreamInput(azureInput)); } catch (Exception ex) { Logger.LogError(ex, $"Error while creating an Azure AudioConfig from an IStreamAudioSource. Format: SampleRate={streamSource.Format.SampleRate}, BitsPerSample={streamSource.Format.BitsPerSample}, Channels={streamSource.Format.ChannelCount}"); streamSource.Dispose(); } } this.StreamAudioSource = null; this.StreamAudioNoiseGate = null; //try and use the built-in audio engine if (Config.AudioSource is AudioDevice audioDevice) { if (audioDevice.UseDefaultAudioInputDevice) { return(AudioConfig.FromDefaultMicrophoneInput()); } } return(null); }
public INoiseGateStreamAudioProcessor GetNoiseGate(IStreamAudioSource audioSource) { //check the input format var format = audioSource.Format; if (format.FloatEncoded) { if (format.BitsPerSample != 32) { Logger.LogInformation($"Unsupported float size: {format.BitsPerSample}"); return(null); } } else if (format.IntEncoded) { if (format.BitsPerSample > 32 || (format.BitsPerSample % 8) != 0) { Logger.LogInformation($"Unsupported int size: {format.BitsPerSample}"); return(null); } } else { Logger.LogInformation($"Unsupported wave enconding: neither float nor int"); return(null); } var noiseGate = ServiceProvider.GetService(typeof(DefaultNoiseGateStreamAudioProcessor)) as DefaultNoiseGateStreamAudioProcessor; try { noiseGate?.SetAudioSource(audioSource); return(noiseGate); } catch (Exception e) { Logger.LogError(e, "Could not initialize default noise gate"); ((IDisposable)noiseGate)?.Dispose(); return(null); } }
public IStreamAudioResampler GetResampler(IStreamAudioSource input, WaveStreamAudioFormat outputFormat) { //try MediaFoundation first (Windows) var mediaFoundationResampler = ServiceProvider.GetService(typeof(NAudioMediaFoundationStreamAudioResampler)) as NAudioMediaFoundationStreamAudioResampler; if (mediaFoundationResampler != null) { try { mediaFoundationResampler.Initialize(input, outputFormat); return(mediaFoundationResampler); } catch (Exception ex) { Logger.LogError(ex, "Could not initialize NAudio MediaFoundation resampler"); mediaFoundationResampler.Dispose(); } } return(null); }
public NAudioStreamAudioSourceToWaveProviderAdapterSimple(IStreamAudioSource source) { this.WaveFormat = NAudioUtilities.ToNAudioWaveFormat(source.Format); }
public NonDisposingStreamAudioSource(IStreamAudioSource audioSource) { this.WrappedAudioSource = audioSource; }
/// <summary> /// See <see cref="GetAudioConfig"/>: constructs a <see cref="IStreamAudioSource"/> for the given audio source (usually an <see cref="AudioDevice"/>). /// Returns null if no <see cref="IStreamAudioSource"/> can be constructed from this source. /// </summary> /// <param name="configSource"></param> /// <returns></returns> protected IStreamAudioSource GetStreamAudioSource(IAudioSource configSource) { IStreamAudioSource streamSource = configSource as IStreamAudioSource; if (streamSource != null) { //take care to keep the original stream intact streamSource = new NonDisposingStreamAudioSource(streamSource); } else { //read from blob/device var provider = ServiceProvider.GetService(typeof(IStreamAudioSourceProvider)) as IStreamAudioSourceProvider; streamSource = provider?.GetStreamAudioSource(configSource); } if (streamSource == null) { Logger.LogError($"Cannot read from AudioDevice: could not construct an IStreamAudioSource"); return(null); } //add a noisegate to avoid unnecessary costs when there's no actual sound. probably best to do that before resampling => saves some processing power var noiseGateProvider = ServiceProvider.GetService(typeof(INoiseGateStreamAudioProcessProvider)) as INoiseGateStreamAudioProcessProvider; var noiseGate = noiseGateProvider?.GetNoiseGate(streamSource); if (noiseGate != null) { noiseGate.SetOptions(new NoiseGateStreamAudioProcessorOptions() { VolumeThreshold = Config.VolumeThreshold, Delay = TimeSpan.FromSeconds(5), DelayStopDetection = TimeSpan.FromSeconds(2) }); streamSource = noiseGate; StreamAudioNoiseGate = noiseGate; } else { Logger.LogWarning($"No noisegate available. Input format: {streamSource.Format}"); } //check on the wave format //according to https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/audio-processing-overview#minimum-requirements-to-use-microsoft-audio-stack //-we need multiples of 16kHz //-"32-bit IEEE little endian float, 32-bit little endian signed int, 24-bit little endian signed int, 16-bit little endian signed int, or 8-bit signed int" //--(though the API doesn't seem to allow float formats) //additionally: more than one channel increases the API cost var sourceFormat = streamSource.Format; bool resampleRequired = sourceFormat.SampleRate % 16000 != 0 || !sourceFormat.IntEncoded || !sourceFormat.IntEncodingSigned || !sourceFormat.ByteOrderLittleEndian || sourceFormat.ChannelCount > 1; if (resampleRequired) { //get a resampler WaveStreamAudioFormat targetFormat = WaveStreamAudioFormat.GetIntFormat( sampleRate: sourceFormat.SampleRate / 16000 * 16000, bitsPerSample: Math.Min(sourceFormat.BitsPerSample / 8 * 8, 32), channelCount: 1, signed: true, littleEndian: true ); var provider = ServiceProvider.GetService(typeof(IStreamAudioResamplerProvider)) as IStreamAudioResamplerProvider; var resampler = provider?.GetResampler(streamSource, targetFormat); if (resampler != null) { streamSource = resampler; } else { //can't resample Logger.LogError($"No resampler available. Input format: {sourceFormat}"); streamSource.Dispose(); return(null); } } return(streamSource); }
public BlobStreamAudioWrapper(IStreamAudioSource wrappedAudioSource, AudioBlob blob) { WrappedAudioSource = wrappedAudioSource; Blob = blob; }
public void SetAudioSource(IStreamAudioSource source) { this.WrappedAudioSource = source; source.DataAvailable += (s, e) => OnSourceDataAvailable(e); }