public GoogleSpeak(Pipeline pipeline, Microsoft.Psi.Audio.WaveFormat format, string languageCode) : base(pipeline) { this.gClient = TextToSpeechClient.Create(); this.format = format; this.TextLanguageCode = languageCode; if (format.Equals(Microsoft.Psi.Audio.WaveFormat.Create16kHz1Channel16BitPcm())) { this.googleAudioFormat = AudioEncoding.Linear16; } }
/// <summary> /// Initializes a new instance of the <see cref="AudioBuffer"/> structure of a pre-allocated fixed length containing /// no data initially. This overload is used primarily for making a copy of an existing <see cref="AudioBuffer"/>. /// </summary> /// <param name="length">The size in bytes of the audio data.</param> /// <param name="format">The audio format.</param> public AudioBuffer(int length, WaveFormat format) : this(new byte[length], format) { }
/// <summary> /// Initializes a new instance of the <see cref="AudioCapture"/> class with a specified output format and device name. /// </summary> /// <param name="pipeline">The pipeline to add the component to.</param> /// <param name="outputFormat">The output format to use.</param> /// <param name="deviceName">The name of the audio device.</param> public AudioCapture(Pipeline pipeline, WaveFormat outputFormat, string deviceName = "plughw:0,0") : this(pipeline, new AudioCaptureConfiguration() { Format = outputFormat, DeviceName = deviceName }) { }
/// <summary> /// Called when the pipeline is shutting down. /// </summary> private void OnPipelineStop() { this.sourceFormat = null; }
/// <summary> /// Initializes a new instance of the <see cref="AudioBuffer"/> structure. /// </summary> /// <param name="data">An array of bytes containing the audio data.</param> /// <param name="format">The audio format.</param> public AudioBuffer(byte[] data, WaveFormat format) { this.data = data; this.format = format; }
/// <summary> /// Initialize the capturer. /// </summary> /// <param name="engineLatency"> /// Number of milliseconds of acceptable lag between live sound being produced and recording operation. /// </param> /// <param name="engineBuffer"> /// Number of milliseconds of audio that may be buffered between reads. /// </param> /// <param name="gain"> /// The gain to be applied to the audio after capture. /// </param> /// <param name="outFormat"> /// The format of the audio to be captured. If this is NULL, the default audio format of the /// capture device will be used. /// </param> /// <param name="callback"> /// Callback function delegate which will handle the captured data. /// </param> /// <param name="speech"> /// If true, sets the audio category to speech to optimize audio pipeline for speech recognition. /// </param> public void Initialize(int engineLatency, int engineBuffer, float gain, WaveFormat outFormat, AudioDataAvailableCallback callback, bool speech) { // Create our shutdown event - we want a manual reset event that starts in the not-signaled state. this.shutdownEvent = new ManualResetEvent(false); // Now activate an IAudioClient object on our preferred endpoint and retrieve the mix format for that endpoint. object obj = this.endpoint.Activate(ref audioClientIID, ClsCtx.INPROC_SERVER, IntPtr.Zero); this.audioClient = (IAudioClient)obj; // The following block enables advanced mic array APO pipeline on Windows 10 RS2 builds >= 15004. // This must be called before the call to GetMixFormat() in LoadFormat(). if (speech) { IAudioClient2 audioClient2 = (IAudioClient2)this.audioClient; if (audioClient2 != null) { AudioClientProperties properties = new AudioClientProperties { Size = Marshal.SizeOf <AudioClientProperties>(), Category = AudioStreamCategory.Speech, }; int hr = audioClient2.SetClientProperties(ref properties); if (hr != 0) { Console.WriteLine("Failed to set audio stream category to AudioCategory_Speech: {0}", hr); } } else { Console.WriteLine("Unable to get IAudioClient2 interface"); } } // Load the MixFormat. This may differ depending on the shared mode used. this.LoadFormat(); // Remember our configured latency and buffer size this.engineLatencyInMs = engineLatency; this.engineBufferInMs = engineBuffer; // Set the gain this.gain = gain; // Determine whether or not we need a resampler this.resampler = null; if (outFormat != null) { // Check if the desired format is supported IntPtr closestMatchPtr; IntPtr outFormatPtr = WaveFormat.MarshalToPtr(outFormat); int hr = this.audioClient.IsFormatSupported(AudioClientShareMode.Shared, outFormatPtr, out closestMatchPtr); // Free outFormatPtr to prevent leaking memory Marshal.FreeHGlobal(outFormatPtr); if (hr == 0) { // Replace _MixFormat with outFormat. Since it is supported, we will initialize // the audio capture client with that format and capture without resampling. this.mixFormat = outFormat; this.mixFrameSize = (this.mixFormat.BitsPerSample / 8) * this.mixFormat.Channels; this.InitializeAudioEngine(); } else { // In all other cases, we need to resample to OutFormat if ((hr == 1) && (closestMatchPtr != IntPtr.Zero)) { // Use closest match suggested by IsFormatSupported() and resample this.mixFormat = WaveFormat.MarshalFromPtr(closestMatchPtr); this.mixFrameSize = (this.mixFormat.BitsPerSample / 8) * this.mixFormat.Channels; // Free closestMatchPtr to prevent leaking memory Marshal.FreeCoTaskMem(closestMatchPtr); } // initialize the audio engine first as the engine latency may be modified after initialization this.InitializeAudioEngine(); // initialize the resampler buffers this.inputBufferSize = (int)(this.engineBufferInMs * this.mixFormat.AvgBytesPerSec / 1000); this.outputBufferSize = (int)(this.engineBufferInMs * outFormat.AvgBytesPerSec / 1000); DeviceUtil.CreateResamplerBuffer(this.inputBufferSize, out this.inputSample, out this.inputBuffer); DeviceUtil.CreateResamplerBuffer(this.outputBufferSize, out this.outputSample, out this.outputBuffer); // Create resampler object this.resampler = DeviceUtil.CreateResampler(this.mixFormat, outFormat); } } else { // initialize the audio engine with the default mix format this.InitializeAudioEngine(); } // Set the callback function this.dataAvailableCallback = callback; }
/// <summary> /// Resamples an audio stream. /// </summary> /// <param name="source">A stream audio to be resampled.</param> /// <param name="outputFormat">The desired audio output format for the resampled stream.</param> /// <returns>A stream of resampled audio.</returns> public static IProducer <AudioBuffer> Resample(this IProducer <AudioBuffer> source, WaveFormat outputFormat) { return(Resample(source, new AudioResamplerConfiguration() { OutputFormat = outputFormat })); }
/// <summary> /// Initializes a new instance of the <see cref="AudioCapture"/> class with a specified output format and device name. /// </summary> /// <param name="pipeline">The pipeline to add the component to.</param> /// <param name="outputFormat">The output format to use.</param> /// <param name="deviceName">The name of the audio device.</param> /// <param name="name">An optional name for the component.</param> public AudioCapture(Pipeline pipeline, WaveFormat outputFormat, string deviceName = null, string name = nameof(AudioCapture)) : this(pipeline, new AudioCaptureConfiguration() { Format = outputFormat, DeviceName = deviceName }, name) { }
/// <inheritdoc/> public void Stop(DateTime finalOriginatingTime, Action notifyCompleted) { notifyCompleted(); this.sourceFormat = null; }
/// <summary> /// Returns the size in bytes required to create a marshaled unmanaged copy of the object. /// </summary> /// <param name="format">The object that is to be marshaled.</param> /// <returns>The unmanaged size of the object.</returns> public static int MarshalSizeOf(WaveFormat format) { return(Marshal.SizeOf <WaveFormat>() + format?.ExtraSize ?? 0); }
/// <summary> /// Starts rendering audio data. /// </summary> /// <param name="maxBufferSeconds"> /// The maximum duration of audio that can be buffered for playback. /// </param> /// <param name="targetLatencyInMs"> /// The target maximum number of milliseconds of acceptable lag between /// playback of samples and live sound being produced. /// </param> /// <param name="gain"> /// The gain to be applied prior to rendering the audio. /// </param> /// <param name="inFormat"> /// The input audio format. /// </param> public void StartRendering(double maxBufferSeconds, int targetLatencyInMs, float gain, WaveFormat inFormat) { if (this.wasapiRenderClient != null) { this.StopRendering(); } // Create an audio buffer to buffer audio awaiting playback. this.audioBufferStream = new CircularBufferStream((long)Math.Ceiling(maxBufferSeconds * inFormat.AvgBytesPerSec), false); this.wasapiRenderClient = new WasapiRenderClient(this.audioDevice); // Create a callback delegate and marshal it to a function pointer. Keep a // reference to the delegate as a class field to prevent it from being GC'd. this.callbackDelegate = new AudioDataRequestedCallback(this.AudioDataRequestedCallback); // initialize the renderer with the desired parameters this.wasapiRenderClient.Initialize(targetLatencyInMs, gain, inFormat, this.callbackDelegate); // tell WASAPI to start rendering this.wasapiRenderClient.Start(); }
/// <summary> /// Converts a stream of audio data to a stream of floating point values. /// </summary> /// <param name="source">A stream containing the input audio data.</param> /// <param name="format">The audio format of the input audio.</param> /// <param name="deliveryPolicy">An optional delivery policy.</param> /// <returns>A stream of floating point audio sample values.</returns> public static IProducer <float[]> ToFloat(this IProducer <byte[]> source, WaveFormat format, DeliveryPolicy deliveryPolicy = null) { return(source.PipeTo(new ToFloat(source.Out.Pipeline, format), deliveryPolicy)); }
/// <summary> /// Transforms a stream of byte arrays containing raw audio to an <see cref="AudioBuffer"/> stream. /// </summary> /// <param name="source">A stream of raw audio byte arrays.</param> /// <param name="audioFormat">The audio format of the raw audio contained within the byte arrays.</param> /// /// <param name="deliveryPolicy">An optional delivery policy.</param> /// <returns>A stream of audio buffers.</returns> public static IProducer <AudioBuffer> ToAudioBuffer(this IProducer <byte[]> source, WaveFormat audioFormat, DeliveryPolicy deliveryPolicy = null) { return(source.Select(x => new AudioBuffer(x, audioFormat), deliveryPolicy)); }
/// <summary> /// Starts capturing audio data. /// </summary> /// <param name="targetLatencyInMs"> /// The target maximum number of milliseconds of acceptable lag between /// live sound being produced and capture operation. /// </param> /// <param name="audioEngineBufferInMs"> /// The amount of audio that may be buffered by the audio engine between /// reads. /// </param> /// <param name="gain"> /// The gain to be applied to the captured audio. /// </param> /// <param name="outFormat"> /// The desired output format of the captured audio. /// </param> /// <param name="speech"> /// If true, optimizes the audio capture pipeline for speech recognition. /// </param> /// <param name="eventDrivenCapture"> /// If true, initialize Windows audio capture in event-driven mode. The audio capture engine will call /// the <see cref="AudioDataAvailableCallback"/> handler as soon as data is available, at intervals /// determined by the audio engine (which may be less than the <paramref name="targetLatencyInMs"/>). /// This captures audio with the lowest possible latency while still allowing for buffering up to the /// amount of time specified by <paramref name="targetLatencyInMs"/> (when for example the system is /// under heavy load and the capture callback is unable to service audio packets at the rate at which /// the audio engine returns captured audio packets). /// </param> public void StartCapture(int targetLatencyInMs, int audioEngineBufferInMs, float gain, WaveFormat outFormat, bool speech, bool eventDrivenCapture) { if (this.wasapiCaptureClient != null) { this.StopCapture(); } this.wasapiCaptureClient = new WasapiCaptureClient(this.audioDevice, eventDrivenCapture); // Create a callback delegate and marshal it to a function pointer. Keep a // reference to the delegate as a class field to prevent it from being GC'd. this.callbackDelegate = new AudioDataAvailableCallback(this.AudioDataAvailableCallback); // initialize the capture with the desired parameters this.wasapiCaptureClient.Initialize(targetLatencyInMs, audioEngineBufferInMs, gain, outFormat, this.callbackDelegate, speech); // tell WASAPI to start capturing this.wasapiCaptureClient.Start(); }