/// <summary> /// Constructs an <see cref="AudioConfig"/> from <see cref="Config"/>. /// Depending on the available services, this may either use the audio features built into the Speech SDK (such as <see cref="AudioConfig.FromDefaultMicrophoneInput"/>), /// or it may construct a <see cref="IStreamAudioSource"/> that accesses the requested <see cref="AudioDevice"/> with resampling and noise gates as required. /// </summary> /// <returns></returns> protected AudioConfig GetAudioConfig() { var streamSource = GetStreamAudioSource(Config.AudioSource); if (streamSource != null) { //use this stream source and convert to an Azure audio stream try { var azureInput = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM( (uint)streamSource.Format.SampleRate, (byte)streamSource.Format.BitsPerSample, (byte)streamSource.Format.ChannelCount)); byte[] bufferOptional = null; streamSource.DataAvailable += (s, e) => { azureInput.Write(e.Buffer.GetArray(ref bufferOptional), e.Buffer.Count); }; streamSource.Stopped += (s, e) => { if (e.Cause == StreamAudioSourceStoppedCause.Stopped) { //signal end-of-stream to Azure azureInput.Close(); } }; this.StreamAudioSource = streamSource; return(AudioConfig.FromStreamInput(azureInput)); } catch (Exception ex) { Logger.LogError(ex, $"Error while creating an Azure AudioConfig from an IStreamAudioSource. Format: SampleRate={streamSource.Format.SampleRate}, BitsPerSample={streamSource.Format.BitsPerSample}, Channels={streamSource.Format.ChannelCount}"); streamSource.Dispose(); } } this.StreamAudioSource = null; this.StreamAudioNoiseGate = null; //try and use the built-in audio engine if (Config.AudioSource is AudioDevice audioDevice) { if (audioDevice.UseDefaultAudioInputDevice) { return(AudioConfig.FromDefaultMicrophoneInput()); } } return(null); }
/// <summary> /// See <see cref="GetAudioConfig"/>: constructs a <see cref="IStreamAudioSource"/> for the given audio source (usually an <see cref="AudioDevice"/>). /// Returns null if no <see cref="IStreamAudioSource"/> can be constructed from this source. /// </summary> /// <param name="configSource"></param> /// <returns></returns> protected IStreamAudioSource GetStreamAudioSource(IAudioSource configSource) { IStreamAudioSource streamSource = configSource as IStreamAudioSource; if (streamSource != null) { //take care to keep the original stream intact streamSource = new NonDisposingStreamAudioSource(streamSource); } else { //read from blob/device var provider = ServiceProvider.GetService(typeof(IStreamAudioSourceProvider)) as IStreamAudioSourceProvider; streamSource = provider?.GetStreamAudioSource(configSource); } if (streamSource == null) { Logger.LogError($"Cannot read from AudioDevice: could not construct an IStreamAudioSource"); return(null); } //add a noisegate to avoid unnecessary costs when there's no actual sound. probably best to do that before resampling => saves some processing power var noiseGateProvider = ServiceProvider.GetService(typeof(INoiseGateStreamAudioProcessProvider)) as INoiseGateStreamAudioProcessProvider; var noiseGate = noiseGateProvider?.GetNoiseGate(streamSource); if (noiseGate != null) { noiseGate.SetOptions(new NoiseGateStreamAudioProcessorOptions() { VolumeThreshold = Config.VolumeThreshold, Delay = TimeSpan.FromSeconds(5), DelayStopDetection = TimeSpan.FromSeconds(2) }); streamSource = noiseGate; StreamAudioNoiseGate = noiseGate; } else { Logger.LogWarning($"No noisegate available. Input format: {streamSource.Format}"); } //check on the wave format //according to https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/audio-processing-overview#minimum-requirements-to-use-microsoft-audio-stack //-we need multiples of 16kHz //-"32-bit IEEE little endian float, 32-bit little endian signed int, 24-bit little endian signed int, 16-bit little endian signed int, or 8-bit signed int" //--(though the API doesn't seem to allow float formats) //additionally: more than one channel increases the API cost var sourceFormat = streamSource.Format; bool resampleRequired = sourceFormat.SampleRate % 16000 != 0 || !sourceFormat.IntEncoded || !sourceFormat.IntEncodingSigned || !sourceFormat.ByteOrderLittleEndian || sourceFormat.ChannelCount > 1; if (resampleRequired) { //get a resampler WaveStreamAudioFormat targetFormat = WaveStreamAudioFormat.GetIntFormat( sampleRate: sourceFormat.SampleRate / 16000 * 16000, bitsPerSample: Math.Min(sourceFormat.BitsPerSample / 8 * 8, 32), channelCount: 1, signed: true, littleEndian: true ); var provider = ServiceProvider.GetService(typeof(IStreamAudioResamplerProvider)) as IStreamAudioResamplerProvider; var resampler = provider?.GetResampler(streamSource, targetFormat); if (resampler != null) { streamSource = resampler; } else { //can't resample Logger.LogError($"No resampler available. Input format: {sourceFormat}"); streamSource.Dispose(); return(null); } } return(streamSource); }