示例#1
0
 public GoogleSpeak(Pipeline pipeline, Microsoft.Psi.Audio.WaveFormat format, string languageCode) : base(pipeline)
 {
     this.gClient          = TextToSpeechClient.Create();
     this.format           = format;
     this.TextLanguageCode = languageCode;
     if (format.Equals(Microsoft.Psi.Audio.WaveFormat.Create16kHz1Channel16BitPcm()))
     {
         this.googleAudioFormat = AudioEncoding.Linear16;
     }
 }
示例#2
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AudioBuffer"/> structure of a pre-allocated fixed length containing
 /// no data initially. This overload is used primarily for making a copy of an existing <see cref="AudioBuffer"/>.
 /// </summary>
 /// <param name="length">The size in bytes of the audio data.</param>
 /// <param name="format">The audio format.</param>
 public AudioBuffer(int length, WaveFormat format)
     : this(new byte[length], format)
 {
 }
示例#3
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AudioCapture"/> class with a specified output format and device name.
 /// </summary>
 /// <param name="pipeline">The pipeline to add the component to.</param>
 /// <param name="outputFormat">The output format to use.</param>
 /// <param name="deviceName">The name of the audio device.</param>
 public AudioCapture(Pipeline pipeline, WaveFormat outputFormat, string deviceName = "plughw:0,0")
     : this(pipeline, new AudioCaptureConfiguration() { Format = outputFormat, DeviceName = deviceName })
 {
 }
示例#4
0
 /// <summary>
 /// Called when the pipeline is shutting down.
 /// </summary>
 private void OnPipelineStop()
 {
     this.sourceFormat = null;
 }
示例#5
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AudioBuffer"/> structure.
 /// </summary>
 /// <param name="data">An array of bytes containing the audio data.</param>
 /// <param name="format">The audio format.</param>
 public AudioBuffer(byte[] data, WaveFormat format)
 {
     this.data   = data;
     this.format = format;
 }
示例#6
0
        /// <summary>
        /// Initialize the capturer.
        /// </summary>
        /// <param name="engineLatency">
        /// Number of milliseconds of acceptable lag between live sound being produced and recording operation.
        /// </param>
        /// <param name="engineBuffer">
        /// Number of milliseconds of audio that may be buffered between reads.
        /// </param>
        /// <param name="gain">
        /// The gain to be applied to the audio after capture.
        /// </param>
        /// <param name="outFormat">
        /// The format of the audio to be captured. If this is NULL, the default audio format of the
        /// capture device will be used.
        /// </param>
        /// <param name="callback">
        /// Callback function delegate which will handle the captured data.
        /// </param>
        /// <param name="speech">
        /// If true, sets the audio category to speech to optimize audio pipeline for speech recognition.
        /// </param>
        public void Initialize(int engineLatency, int engineBuffer, float gain, WaveFormat outFormat, AudioDataAvailableCallback callback, bool speech)
        {
            // Create our shutdown event - we want a manual reset event that starts in the not-signaled state.
            this.shutdownEvent = new ManualResetEvent(false);

            // Now activate an IAudioClient object on our preferred endpoint and retrieve the mix format for that endpoint.
            object obj = this.endpoint.Activate(ref audioClientIID, ClsCtx.INPROC_SERVER, IntPtr.Zero);

            this.audioClient = (IAudioClient)obj;

            // The following block enables advanced mic array APO pipeline on Windows 10 RS2 builds >= 15004.
            // This must be called before the call to GetMixFormat() in LoadFormat().
            if (speech)
            {
                IAudioClient2 audioClient2 = (IAudioClient2)this.audioClient;
                if (audioClient2 != null)
                {
                    AudioClientProperties properties = new AudioClientProperties
                    {
                        Size     = Marshal.SizeOf <AudioClientProperties>(),
                        Category = AudioStreamCategory.Speech,
                    };

                    int hr = audioClient2.SetClientProperties(ref properties);
                    if (hr != 0)
                    {
                        Console.WriteLine("Failed to set audio stream category to AudioCategory_Speech: {0}", hr);
                    }
                }
                else
                {
                    Console.WriteLine("Unable to get IAudioClient2 interface");
                }
            }

            // Load the MixFormat. This may differ depending on the shared mode used.
            this.LoadFormat();

            // Remember our configured latency and buffer size
            this.engineLatencyInMs = engineLatency;
            this.engineBufferInMs  = engineBuffer;

            // Set the gain
            this.gain = gain;

            // Determine whether or not we need a resampler
            this.resampler = null;

            if (outFormat != null)
            {
                // Check if the desired format is supported
                IntPtr closestMatchPtr;
                IntPtr outFormatPtr = WaveFormat.MarshalToPtr(outFormat);
                int    hr           = this.audioClient.IsFormatSupported(AudioClientShareMode.Shared, outFormatPtr, out closestMatchPtr);

                // Free outFormatPtr to prevent leaking memory
                Marshal.FreeHGlobal(outFormatPtr);

                if (hr == 0)
                {
                    // Replace _MixFormat with outFormat. Since it is supported, we will initialize
                    // the audio capture client with that format and capture without resampling.
                    this.mixFormat    = outFormat;
                    this.mixFrameSize = (this.mixFormat.BitsPerSample / 8) * this.mixFormat.Channels;

                    this.InitializeAudioEngine();
                }
                else
                {
                    // In all other cases, we need to resample to OutFormat
                    if ((hr == 1) && (closestMatchPtr != IntPtr.Zero))
                    {
                        // Use closest match suggested by IsFormatSupported() and resample
                        this.mixFormat    = WaveFormat.MarshalFromPtr(closestMatchPtr);
                        this.mixFrameSize = (this.mixFormat.BitsPerSample / 8) * this.mixFormat.Channels;

                        // Free closestMatchPtr to prevent leaking memory
                        Marshal.FreeCoTaskMem(closestMatchPtr);
                    }

                    // initialize the audio engine first as the engine latency may be modified after initialization
                    this.InitializeAudioEngine();

                    // initialize the resampler buffers
                    this.inputBufferSize  = (int)(this.engineBufferInMs * this.mixFormat.AvgBytesPerSec / 1000);
                    this.outputBufferSize = (int)(this.engineBufferInMs * outFormat.AvgBytesPerSec / 1000);

                    DeviceUtil.CreateResamplerBuffer(this.inputBufferSize, out this.inputSample, out this.inputBuffer);
                    DeviceUtil.CreateResamplerBuffer(this.outputBufferSize, out this.outputSample, out this.outputBuffer);

                    // Create resampler object
                    this.resampler = DeviceUtil.CreateResampler(this.mixFormat, outFormat);
                }
            }
            else
            {
                // initialize the audio engine with the default mix format
                this.InitializeAudioEngine();
            }

            // Set the callback function
            this.dataAvailableCallback = callback;
        }
示例#7
0
 /// <summary>
 /// Resamples an audio stream.
 /// </summary>
 /// <param name="source">A stream audio to be resampled.</param>
 /// <param name="outputFormat">The desired audio output format for the resampled stream.</param>
 /// <returns>A stream of resampled audio.</returns>
 public static IProducer <AudioBuffer> Resample(this IProducer <AudioBuffer> source, WaveFormat outputFormat)
 {
     return(Resample(source, new AudioResamplerConfiguration()
     {
         OutputFormat = outputFormat
     }));
 }
示例#8
0
 /// <summary>
 /// Initializes a new instance of the <see cref="AudioCapture"/> class with a specified output format and device name.
 /// </summary>
 /// <param name="pipeline">The pipeline to add the component to.</param>
 /// <param name="outputFormat">The output format to use.</param>
 /// <param name="deviceName">The name of the audio device.</param>
 /// <param name="name">An optional name for the component.</param>
 public AudioCapture(Pipeline pipeline, WaveFormat outputFormat, string deviceName = null, string name = nameof(AudioCapture))
     : this(pipeline, new AudioCaptureConfiguration() { Format = outputFormat, DeviceName = deviceName }, name)
 {
 }
示例#9
0
 /// <inheritdoc/>
 public void Stop(DateTime finalOriginatingTime, Action notifyCompleted)
 {
     notifyCompleted();
     this.sourceFormat = null;
 }
示例#10
0
 /// <summary>
 /// Returns the size in bytes required to create a marshaled unmanaged copy of the object.
 /// </summary>
 /// <param name="format">The object that is to be marshaled.</param>
 /// <returns>The unmanaged size of the object.</returns>
 public static int MarshalSizeOf(WaveFormat format)
 {
     return(Marshal.SizeOf <WaveFormat>() + format?.ExtraSize ?? 0);
 }
示例#11
0
        /// <summary>
        /// Starts rendering audio data.
        /// </summary>
        /// <param name="maxBufferSeconds">
        /// The maximum duration of audio that can be buffered for playback.
        /// </param>
        /// <param name="targetLatencyInMs">
        /// The target maximum number of milliseconds of acceptable lag between
        /// playback of samples and live sound being produced.
        /// </param>
        /// <param name="gain">
        /// The gain to be applied prior to rendering the audio.
        /// </param>
        /// <param name="inFormat">
        /// The input audio format.
        /// </param>
        public void StartRendering(double maxBufferSeconds, int targetLatencyInMs, float gain, WaveFormat inFormat)
        {
            if (this.wasapiRenderClient != null)
            {
                this.StopRendering();
            }

            // Create an audio buffer to buffer audio awaiting playback.
            this.audioBufferStream = new CircularBufferStream((long)Math.Ceiling(maxBufferSeconds * inFormat.AvgBytesPerSec), false);

            this.wasapiRenderClient = new WasapiRenderClient(this.audioDevice);

            // Create a callback delegate and marshal it to a function pointer. Keep a
            // reference to the delegate as a class field to prevent it from being GC'd.
            this.callbackDelegate = new AudioDataRequestedCallback(this.AudioDataRequestedCallback);

            // initialize the renderer with the desired parameters
            this.wasapiRenderClient.Initialize(targetLatencyInMs, gain, inFormat, this.callbackDelegate);

            // tell WASAPI to start rendering
            this.wasapiRenderClient.Start();
        }
示例#12
0
 /// <summary>
 /// Converts a stream of audio data to a stream of floating point values.
 /// </summary>
 /// <param name="source">A stream containing the input audio data.</param>
 /// <param name="format">The audio format of the input audio.</param>
 /// <param name="deliveryPolicy">An optional delivery policy.</param>
 /// <returns>A stream of floating point audio sample values.</returns>
 public static IProducer <float[]> ToFloat(this IProducer <byte[]> source, WaveFormat format, DeliveryPolicy deliveryPolicy = null)
 {
     return(source.PipeTo(new ToFloat(source.Out.Pipeline, format), deliveryPolicy));
 }
示例#13
0
 /// <summary>
 /// Transforms a stream of byte arrays containing raw audio to an <see cref="AudioBuffer"/> stream.
 /// </summary>
 /// <param name="source">A stream of raw audio byte arrays.</param>
 /// <param name="audioFormat">The audio format of the raw audio contained within the byte arrays.</param>
 /// /// <param name="deliveryPolicy">An optional delivery policy.</param>
 /// <returns>A stream of audio buffers.</returns>
 public static IProducer <AudioBuffer> ToAudioBuffer(this IProducer <byte[]> source, WaveFormat audioFormat, DeliveryPolicy deliveryPolicy = null)
 {
     return(source.Select(x => new AudioBuffer(x, audioFormat), deliveryPolicy));
 }
示例#14
0
        /// <summary>
        /// Starts capturing audio data.
        /// </summary>
        /// <param name="targetLatencyInMs">
        /// The target maximum number of milliseconds of acceptable lag between
        /// live sound being produced and capture operation.
        /// </param>
        /// <param name="audioEngineBufferInMs">
        /// The amount of audio that may be buffered by the audio engine between
        /// reads.
        /// </param>
        /// <param name="gain">
        /// The gain to be applied to the captured audio.
        /// </param>
        /// <param name="outFormat">
        /// The desired output format of the captured audio.
        /// </param>
        /// <param name="speech">
        /// If true, optimizes the audio capture pipeline for speech recognition.
        /// </param>
        /// <param name="eventDrivenCapture">
        /// If true, initialize Windows audio capture in event-driven mode. The audio capture engine will call
        /// the <see cref="AudioDataAvailableCallback"/> handler as soon as data is available, at intervals
        /// determined by the audio engine (which may be less than the <paramref name="targetLatencyInMs"/>).
        /// This captures audio with the lowest possible latency while still allowing for buffering up to the
        /// amount of time specified by <paramref name="targetLatencyInMs"/> (when for example the system is
        /// under heavy load and the capture callback is unable to service audio packets at the rate at which
        /// the audio engine returns captured audio packets).
        /// </param>
        public void StartCapture(int targetLatencyInMs, int audioEngineBufferInMs, float gain, WaveFormat outFormat, bool speech, bool eventDrivenCapture)
        {
            if (this.wasapiCaptureClient != null)
            {
                this.StopCapture();
            }

            this.wasapiCaptureClient = new WasapiCaptureClient(this.audioDevice, eventDrivenCapture);

            // Create a callback delegate and marshal it to a function pointer. Keep a
            // reference to the delegate as a class field to prevent it from being GC'd.
            this.callbackDelegate = new AudioDataAvailableCallback(this.AudioDataAvailableCallback);

            // initialize the capture with the desired parameters
            this.wasapiCaptureClient.Initialize(targetLatencyInMs, audioEngineBufferInMs, gain, outFormat, this.callbackDelegate, speech);

            // tell WASAPI to start capturing
            this.wasapiCaptureClient.Start();
        }