/// <summary> /// Cleans up the microphone resources and the stream and unregisters from MediaCapture events /// </summary> /// <returns>true if successful</returns> public async System.Threading.Tasks.Task <bool> CleanupRecording() { if (isRecordingInitialized) { // If a recording is in progress during cleanup, stop it to save the recording if (isRecording) { await StopRecording(); } isRecordingInitialized = false; } if (mediaCapture != null) { mediaCapture.RecordLimitationExceeded -= mediaCapture_RecordLimitationExceeded; mediaCapture.Failed -= mediaCapture_Failed; mediaCapture.Dispose(); mediaCapture = null; } if (STTStream != null) { STTStream.BufferReady -= STTStream_BufferReady; STTStream.AudioLevel -= STTStream_AudioLevel; STTStream.Dispose(); STTStream = null; } return(true); }
public static SpeechToTextMainStream Create(ulong maxSizeInBytes = 0, uint thresholdDuration = 0, uint thresholdLevel = 0) { SpeechToTextMainStream stts = null; try { stts = new SpeechToTextMainStream(maxSizeInBytes, thresholdDuration, thresholdLevel); } catch (Exception ex) { System.Diagnostics.Debug.WriteLine("Exception while creating SpeechToTextMainStream: " + ex.Message); } return(stts); }
/// <summary> /// StartRecording method /// Start to record audio using the microphone. /// The audio stream in stored in memory with no limit of size. /// </summary> /// <param name="MaxStreamSizeInBytes"> /// This parameter defines the max size of the buffer in memory. When the size of the buffer is over this limit, the /// client create another stream and remove the previouw stream. /// By default the value is 0, in that case the audio stream in stored in memory with no limit of size. /// </param> /// <param name="ThresholdDuration"> /// The duration in milliseconds for the calculation of the average audio level. /// With this parameter you define the period during which the average level is measured. /// If the value is 0, no buffer will be sent to Cognitive Services. /// </param> /// <param name="ThresholdLevel"> /// The minimum audio level average necessary to trigger the recording, /// it's a value between 0 and 65535. You can tune this value after several microphone tests. /// If the value is 0, no buffer will be sent to Cognitive Services. /// </param> /// <return>return true if successful. /// </return> public async System.Threading.Tasks.Task <bool> StartContinuousRecording(ulong MaxStreamSizeInBytes, UInt16 ThresholdDuration, UInt16 ThresholdLevel) { thresholdDuration = ThresholdDuration; thresholdLevel = ThresholdLevel; bool bResult = false; maxStreamSizeInBytes = MaxStreamSizeInBytes; if (isRecordingInitialized != true) { await InitializeRecording(); } if (STTStream != null) { STTStream.BufferReady -= STTStream_BufferReady; STTStream.AudioLevel -= STTStream_AudioLevel; STTStream.Dispose(); STTStream = null; } STTStream = SpeechToTextMainStream.Create(maxStreamSizeInBytes, thresholdDuration, thresholdLevel); STTStream.AudioLevel += STTStream_AudioLevel; STTStream.BufferReady += STTStream_BufferReady; if ((STTStream != null) && (isRecordingInitialized == true)) { try { Windows.Media.MediaProperties.MediaEncodingProfile MEP = Windows.Media.MediaProperties.MediaEncodingProfile.CreateWav(Windows.Media.MediaProperties.AudioEncodingQuality.Auto); if (MEP != null) { if (MEP.Audio != null) { uint framerate = 16000; uint bitsPerSample = 16; uint numChannels = 1; uint bytespersecond = 32000; MEP.Audio.Properties[WAVAttributes.MF_MT_AUDIO_SAMPLES_PER_SECOND] = framerate; MEP.Audio.Properties[WAVAttributes.MF_MT_AUDIO_NUM_CHANNELS] = numChannels; MEP.Audio.Properties[WAVAttributes.MF_MT_AUDIO_BITS_PER_SAMPLE] = bitsPerSample; MEP.Audio.Properties[WAVAttributes.MF_MT_AUDIO_AVG_BYTES_PER_SECOND] = bytespersecond; foreach (var Property in MEP.Audio.Properties) { System.Diagnostics.Debug.WriteLine("Property: " + Property.Key.ToString()); System.Diagnostics.Debug.WriteLine("Value: " + Property.Value.ToString()); if (Property.Key == new Guid("5faeeae7-0290-4c31-9e8a-c534f68d9dba")) { framerate = (uint)Property.Value; } if (Property.Key == new Guid("f2deb57f-40fa-4764-aa33-ed4f2d1ff669")) { bitsPerSample = (uint)Property.Value; } if (Property.Key == new Guid("37e48bf5-645e-4c5b-89de-ada9e29b696a")) { numChannels = (uint)Property.Value; } } } if (MEP.Container != null) { foreach (var Property in MEP.Container.Properties) { System.Diagnostics.Debug.WriteLine("Property: " + Property.Key.ToString()); System.Diagnostics.Debug.WriteLine("Value: " + Property.Value.ToString()); } } } await mediaCapture.StartRecordToStreamAsync(MEP, STTStream); bResult = true; isRecording = true; System.Diagnostics.Debug.WriteLine("Recording in audio stream..."); } catch (Exception e) { System.Diagnostics.Debug.WriteLine("Exception while recording in audio stream:" + e.Message); } } return(bResult); }
/// <summary> /// SendStorageFile method /// </summary> /// <param name="wavFile">StorageFile associated with the audio file which /// will be sent to the SpeechToText Services. /// </param> /// <param name="locale">language associated with the current buffer/recording. /// for instance en-US, fr-FR, pt-BR, ... /// </param> /// <return>The result of the SpeechToText REST API. /// </return> public async System.Threading.Tasks.Task <SpeechToTextResponse> SendStorageFile(Windows.Storage.StorageFile wavFile, string locale) { SpeechToTextResponse r = null; int loop = 1; while (loop-- > 0) { try { string os = "Windows" + SpeechToText.SystemInformation.SystemVersion; string deviceid = "b2c95ede-97eb-4c88-81e4-80f32d6aee54"; string speechUrl = SpeechUrl + "?scenarios=ulm&appid=D4D52672-91D7-4C74-8AD8-42B1D98141A5&version=3.0&device.os=" + os + "&locale=" + locale + "&format=json&requestid=" + Guid.NewGuid().ToString() + "&instanceid=" + deviceid + "&result.profanitymarkup=1&maxnbest=3"; Windows.Web.Http.HttpClient hc = new Windows.Web.Http.HttpClient(); hc.DefaultRequestHeaders.TryAppendWithoutValidation("Authorization", Token); hc.DefaultRequestHeaders.TryAppendWithoutValidation("ContentType", "audio/wav; codec=\"audio/pcm\"; samplerate=16000"); Windows.Web.Http.HttpResponseMessage hrm = null; Windows.Storage.StorageFile file = wavFile; if (file != null) { using (var fileStream = await file.OpenAsync(Windows.Storage.FileAccessMode.Read)) { if (STTStream != null) { STTStream.AudioLevel -= STTStream_AudioLevel; STTStream.Dispose(); STTStream = null; } STTStream = SpeechToTextMainStream.Create(); if (STTStream != null) { byte[] byteArray = new byte[fileStream.Size]; fileStream.ReadAsync(byteArray.AsBuffer(), (uint)fileStream.Size, Windows.Storage.Streams.InputStreamOptions.Partial).AsTask().Wait(); STTStream.WriteAsync(byteArray.AsBuffer()).AsTask().Wait(); Windows.Web.Http.HttpStreamContent content = new Windows.Web.Http.HttpStreamContent(STTStream.AsStream().AsInputStream()); content.Headers.ContentLength = STTStream.GetLength(); System.Diagnostics.Debug.WriteLine("REST API Post Content Length: " + content.Headers.ContentLength.ToString() + " bytes"); System.Threading.CancellationTokenSource cts = new System.Threading.CancellationTokenSource(); IProgress <Windows.Web.Http.HttpProgress> progress = new Progress <Windows.Web.Http.HttpProgress>(ProgressHandler); hrm = await hc.PostAsync(new Uri(speechUrl), content).AsTask(cts.Token, progress); } } } if (hrm != null) { switch (hrm.StatusCode) { case Windows.Web.Http.HttpStatusCode.Ok: var b = await hrm.Content.ReadAsBufferAsync(); string result = System.Text.UTF8Encoding.UTF8.GetString(b.ToArray()); if (!string.IsNullOrEmpty(result)) { r = new SpeechToTextResponse(result); } break; case Windows.Web.Http.HttpStatusCode.Forbidden: string token = await RenewToken(); if (string.IsNullOrEmpty(token)) { loop++; } break; default: int code = (int)hrm.StatusCode; string HttpError = "Http Response Error: " + code.ToString() + " reason: " + hrm.ReasonPhrase.ToString(); System.Diagnostics.Debug.WriteLine(HttpError); r = new SpeechToTextResponse(string.Empty, HttpError); break; } } } catch (Exception ex) { System.Diagnostics.Debug.WriteLine("Exception while sending the audio file:" + ex.Message); } } return(r); }