private static async Task ConvertAudioToTextAsync(Stream audioBlob, Stream textBlob, SpeechConfig config) { var completionSource = new TaskCompletionSource <int>(); using (var audioInput = AudioConfig.FromStreamInput(new AudioStreamReader(audioBlob))) { using (var recognizer = new SpeechRecognizer(config, audioInput)) { var streamWriter = new StreamWriter(textBlob); recognizer.Recognized += (s, e) => streamWriter.Write(e.Result); recognizer.SessionStopped += (s, e) => { streamWriter.Flush(); streamWriter.Dispose(); completionSource.TrySetResult(0); }; await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); await Task.WhenAny(new[] { completionSource.Task }); await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } }
private static async Task <string> UploadAudioAndStartRemoteTranscription(string key, string region) { AudioStreamFormat audioStreamFormat; var config = SpeechConfig.FromSubscription(key, region); config.SetProperty("ConversationTranscriptionInRoomAndOnline", "true"); config.SetServiceProperty("transcriptionMode", "RealTimeAndAsync", ServicePropertyChannel.UriQueryParameter); var waveFilePullStream = OpenWavFile(@"katiesteve.wav", out audioStreamFormat); var audioInput = AudioConfig.FromStreamInput(AudioInputStream.CreatePullStream(waveFilePullStream, audioStreamFormat)); var meetingId = Guid.NewGuid().ToString(); using (var conversation = await Conversation.CreateConversationAsync(config, meetingId)) { using (var conversationTranscriber = TrackSessionId(new ConversationTranscriber(audioInput))) { await conversationTranscriber.JoinConversationAsync(conversation); await conversation.AddParticipantAsync("OneUserByUserId"); var user = User.FromUserId("CreateUserFromId and then add it"); await conversation.AddParticipantAsync(user); var result = await GetRecognizerResult(conversationTranscriber, meetingId); } } return(meetingId); }
private void SetupTranscriptionAndTranslationService() { try { var lCognitiveKey = _settings.AzureCognitiveKey; var lCognitiveRegion = _settings.AzureCognitiveRegion; _eventPublisher.Publish("MySTT Setup", $"Got region: {lCognitiveRegion}, key starting from: {lCognitiveKey??lCognitiveKey.Substring(0, lCognitiveKey.Length /2)}"); this.mTransSpeechConfig = SpeechTranslationConfig.FromSubscription(lCognitiveKey, lCognitiveRegion); var fromLanguage = "en-US"; var toLanguages = new List <string> { "el-GR" }; //var toLanguages = new List<string> { "ru-RU" }; this.mTransSpeechConfig.SpeechRecognitionLanguage = fromLanguage; toLanguages.ForEach(this.mTransSpeechConfig.AddTargetLanguage); this.mInputStream = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM(SAMPLESPERSECOND, BITSPERSAMPLE, NUMBEROFCHANNELS)); this.mAudioConfig = AudioConfig.FromStreamInput(this.mInputStream); this.mTranslationRecognizer = new TranslationRecognizer(this.mTransSpeechConfig, this.mAudioConfig); this.mTranslationRecognizer.Recognizing += this.MSpeechRecognizer_Recognizing; this.mTranslationRecognizer.Recognized += this.MSpeechRecognizer_Recognized; this.mTranslationRecognizer.SpeechEndDetected += this.MSpeechRecognizer_SpeechEndDetected; this.StartRecognisionIfNeeded(); } catch (Exception ex) { _eventPublisher.Publish("MySTT Setup - Failed", $"Failed to initialize: {ex.Message}"); } }
public static AudioConfig OpenWavFile(Stream stream) { BinaryReader reader = new BinaryReader(stream); AudioStreamFormat format = readWaveHeader(reader); return(AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format)); }
public async void AudioStart() { var audioStream = new VoiceAudioStream(); var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1); var audioConfig = AudioConfig.FromStreamInput(audioStream, audioFormat); var speechConfig = SpeechConfig.FromSubscription(_config["SpeechApiKey"], _config["SpeechRegion"]); var speechClient = new SpeechRecognizer(speechConfig, audioConfig); var phraseList = PhraseListGrammar.FromRecognizer(speechClient); foreach (var phrase in phrases) { phraseList.AddPhrase(phrase); } speechClient.Recognized += _speechClient_Recognized; string sessionId = speechClient.Properties.GetProperty(PropertyId.Speech_SessionId); var conn = new ConnectionInfo() { SessionId = sessionId, AudioStream = audioStream, SpeechClient = speechClient, }; _connections.Add(Context.ConnectionId, conn); await speechClient.StartContinuousRecognitionAsync(); Debug.WriteLine("Audio start message."); }
public async Task <string> DetectLanguage(byte[] audioBytes, string fileExtension, string locale1, string locale2) { var wavBytes = ConvertToWaveBytes(audioBytes, fileExtension); var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { locale1, locale2 }); var config = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion); var stopRecognition = new TaskCompletionSource <int>(); var detected = new List <string>(); using var pushStream = AudioInputStream.CreatePushStream(); using (var audioInput = AudioConfig.FromStreamInput(pushStream)) { using var recognizer = new SpeechRecognizer( config, autoDetectSourceLanguageConfig, audioInput); pushStream.Write(wavBytes); pushStream.Close(); recognizer.Recognized += (s, e) => { var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result); var detectedLanguage = autoDetectSourceLanguageResult.Language; detected.Add(detectedLanguage); if (detected.Count > UtteranceCount) { stopRecognition.TrySetResult(0); } }; recognizer.SessionStopped += (s, e) => { stopRecognition.TrySetResult(0); }; await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); var t = Task.Factory.StartNew(async() => { await SetTimeOutForRecognition(stopRecognition).ConfigureAwait(false); }, CancellationToken.None, TaskCreationOptions.None, TaskScheduler.Default); Task.WaitAny(new[] { stopRecognition.Task }); await recognizer.StopKeywordRecognitionAsync().ConfigureAwait(false); } if (detected.Count == 0) { throw new TimeoutException("Did not get any language identification results back in time."); } var detectedByCount = detected.GroupBy(i => i); var mostFreq = detectedByCount.OrderBy(t => t.Count()).LastOrDefault().Key; if (string.IsNullOrEmpty(mostFreq) || (!mostFreq.Equals(locale1, StringComparison.OrdinalIgnoreCase) && !mostFreq.Equals(locale2, StringComparison.OrdinalIgnoreCase))) { return(locale1); } return(mostFreq); }
/// <summary> /// Starts this instance. /// </summary> private async Task _start() { await this._syncLock.WaitAsync().ConfigureAwait(false); if (!_isRunning) { _tokenSource = new CancellationTokenSource(); _buffer = new BufferBlock <SerializableAudioMediaBuffer>(new DataflowBlockOptions { CancellationToken = this._tokenSource.Token }); await Task.Factory.StartNew(this._process).ConfigureAwait(false); // Initialize speech recognizer. Debug.WriteLine("RecordingBot _start."); _audioStream = new VoiceAudioStream(); var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1); var audioConfig = AudioConfig.FromStreamInput(_audioStream, audioFormat); var speechConfig = SpeechConfig.FromSubscription("03f0f0daa33448ba9f9bf799d2e14d2a", "westus2"); _speechClient = new SpeechRecognizer(speechConfig, audioConfig); _speechClient.Recognized += _speechClient_Recognized; _speechClient.Recognizing += _speechClient_Recognizing; _speechClient.Canceled += _speechClient_Canceled; await _speechClient.StartContinuousRecognitionAsync(); _isRunning = true; } this._syncLock.Release(); }
//private const string speechEndpoint = "https://YOUR_LOCATION.api.cognitive.microsoft.com/"; //public async Task<IActionResult> OnGetAsync() //{ // return Page(); //} public async Task <IActionResult> OnPostAsync() { var speechConfig = SpeechConfig.FromSubscription(speechKey, speechLocation); speechConfig.SpeechRecognitionLanguage = "ja-JP"; byte[] readBytes; using var audioInputStream = AudioInputStream.CreatePushStream(); using var reader = new BinaryReader(VoiceFile.OpenReadStream()); do { readBytes = reader.ReadBytes(1024); audioInputStream.Write(readBytes, readBytes.Length); } while (readBytes.Length > 0); var audioConfig = AudioConfig.FromStreamInput(audioInputStream); using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig); var result = await speechRecognizer.RecognizeOnceAsync(); if (result.Reason == ResultReason.RecognizedSpeech) { Result = "Œ‹‰Ê:"; RecognizedText = result.Text; } return(Page()); }
/// <summary> /// Returns speech to text from selected Opus audiofile streamed from a blobcontainer in Azure Storage. /// </summary> /// <param name="opusBlob">Name of opus file</param> /// <param name="container">Azure blob container name</param> /// <returns>List<Speech> container speechresults</returns> public async Task <List <Speech> > RunRecognitionAsync(string opusBlob, string container) { SpeechResult = new List <Speech>(); var blobService = new BlobService(); var blobClient = await blobService.GetBlobFromContainerAsync(opusBlob, container); using var audioInputStream = AudioInputStream.CreatePushStream(); using var audioConfig = AudioConfig.FromStreamInput(audioInputStream); using (var recognizer = new SpeechRecognizer(_speechConfig, _languagesToDetect, audioConfig)) { recognizer.Recognizing += Recognizing; recognizer.Recognized += Recognized; recognizer.SessionStarted += SessionStarted; recognizer.SessionStopped += SessionStopped; recognizer.Canceled += SessionCanceled; await InjectStreamIntoRecognizerAsync(audioInputStream, blobClient); await recognizer.StartContinuousRecognitionAsync(); Task.WaitAny(new[] { _stopRecognition.Task }); await recognizer.StopContinuousRecognitionAsync(); } return(SpeechResult); }
/// <summary> /// Remote audio transcription of the given audioFile with CognitiveServices /// </summary> public static AnalysisResult TranscribeAudio(ref AnalysisResult audioResponse, IFormFile audioFile) { // needed for speaker diarization to resolve at the word level SPEECH_CONFIG.RequestWordLevelTimestamps(); var audioFormat128 = AudioStreamFormat.GetWaveFormatPCM(8000, 16, 1); var audioFormat256 = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1); // load bytestream -> audio stream // load audio config from audio stream // initialize speech recognizer using (var br = new BinaryReader(audioFile.OpenReadStream())) using (var audioInputStream = AudioInputStream.CreatePushStream(audioFormat128)) using (var audioConfig = AudioConfig.FromStreamInput(audioInputStream)) using (var recognizer = new SpeechRecognizer(SPEECH_CONFIG, audioConfig)) { long nbytes = audioFile.Length; var buff = new List <byte>(); // read through bytes of audio byte[] readBytes; do { readBytes = br.ReadBytes(1024); buff.AddRange(readBytes); audioInputStream.Write(readBytes, readBytes.Length); } while (readBytes.Length > 0); var transcript = ExecuteRecognizer(recognizer).Result; audioResponse.Transcript = transcript; return(audioResponse); } }
public static async Task <string> RecognizeSpeechFromUrlAsync(string url, string locale) { byte[] audioData = null; using (var wc = new System.Net.WebClient()) { audioData = wc.DownloadData(url); } var stream = new MemoryStream(audioData); var speechApiKey = Environment.GetEnvironmentVariable("SpeechApiKey"); var speechApiRegion = Environment.GetEnvironmentVariable("SpeechApiRegion"); var speechConfig = SpeechConfig.FromSubscription(speechApiKey, speechApiRegion); speechConfig.SpeechRecognitionLanguage = locale; var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1); var audioStream = new VoiceAudioStream(stream); var audioConfig = AudioConfig.FromStreamInput(audioStream, audioFormat); var recognizer = new SpeechRecognizer(speechConfig, audioConfig); var result = await recognizer.RecognizeOnceAsync(); return(result.Text); }
/// <summary> /// 从音频流进行识别 /// /// 目前只支持特定的音频,详情查看: /// https://docs.microsoft.com/zh-cn/azure/cognitive-services/speech-service/how-to-use-audio-input-streams /// </summary> /// <param name="stream"></param> /// <returns></returns> public static async Task FormStream(Stream stream) { using (var audioConfig = AudioConfig.FromStreamInput(new ReadPCMStream(stream), AudioStreamFormat)) // 从文件读取 { await SpeechRecognizer(audioConfig); } }
/// <summary> /// This method opens wavefile. /// </summary> /// <param name="filename">The wavfile to read the audio data from.</param> public static AudioConfig OpenWavFile(string filename) { AudioStreamFormat format = null; var callback = OpenWavFileStream(filename, out format); return(AudioConfig.FromStreamInput(callback, format)); }
public static AudioConfig OpenWavFile(BinaryReader reader, AudioProcessingOptions audioProcessingOptions = null) { AudioStreamFormat format = readWaveHeader(reader); return((audioProcessingOptions == null) ? AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format) : AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format, audioProcessingOptions)); }
public async void RegisterAttendeeAsync(string name, string myLanguage, string preferredLanguage) { Debug.WriteLine($"User {name}, Language: {myLanguage}, Connection {Context.ConnectionId} starting audio."); var config = _config.GetSection("SpeechAPI").Get <AppSettings>(); bool exists = await InitializeAttendeeInfo(name, myLanguage, preferredLanguage); var audioStream = new VoiceAudioStream(); var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1); var audioConfig = AudioConfig.FromStreamInput(audioStream, audioFormat); var speechKey = config.SubscriptionKey; var speechRegion = config.Region; var url = config.EndpointUri; Debug.WriteLine($"Key:{speechKey} | Region:{speechRegion}"); var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion); speechConfig.SpeechRecognitionLanguage = preferredLanguage; speechConfig.OutputFormat = OutputFormat.Simple; var speechClient = new SpeechRecognizer(speechConfig, audioConfig); speechClient.Recognized += _speechClient_Recognized; speechClient.Recognizing += _speechClient_Recognizing; speechClient.Canceled += _speechClient_Canceled; speechClient.SessionStarted += _speechClient_SessionStarted; string sessionId = speechClient.Properties.GetProperty(PropertyId.Speech_SessionId); //Maintains only one API connection per language SpeechAPIConnection conn = null; if (_connections.ContainsKey(preferredLanguage)) { conn = _connections[preferredLanguage]; conn.SessionId = sessionId; } else { conn = new SpeechAPIConnection() { SessionId = sessionId, AudioStream = audioStream, Recognizer = speechClient, Language = preferredLanguage }; _connections[preferredLanguage] = conn; } Debug.WriteLine($"Connection for {preferredLanguage} added | SessionId:{sessionId}"); await SendToAttendeeAsync(_attendeeInfo.GetAttendeeByConnectionID(Context.ConnectionId), $"Welcome:{name}"); await speechClient.StartContinuousRecognitionAsync(); Debug.WriteLine("Audio start message."); }
/// <summary> /// Initializes a new instance of the <see cref="ContinuousSpeechRecognizer"/> class. /// </summary> /// <param name="pipeline">The pipeline in which to create the component.</param> /// <param name="subscriptionKey">The subscription key for the Azure speech resource.</param> /// <param name="region">The service region of the Azure speech resource.</param> public ContinuousSpeechRecognizer(Pipeline pipeline, string subscriptionKey, string region) : base(pipeline) { var config = SpeechConfig.FromSubscription(subscriptionKey, region); this.pushStream = AudioInputStream.CreatePushStream(); this.audioInput = AudioConfig.FromStreamInput(this.pushStream); this.recognizer = new SpeechRecognizer(config, this.audioInput); }
public TranslationEngine(IConfiguration config, IHubContext <TranslationHub> hub) { _hub = hub; _config = config; _translationConfig = SpeechTranslationConfig.FromSubscription(_config["SUBSCRIPTION_KEY"], _config["REGION"]); _speechConfig = SpeechTranslationConfig.FromSubscription(_config["SUBSCRIPTION_KEY"], _config["REGION"]); _audioInput = AudioConfig.FromStreamInput(_inputStream); _audioOutputStream = AudioOutputStream.CreatePullStream(); _output = AudioConfig.FromStreamOutput(_audioOutputStream); }
/// <summary> /// Constructs an <see cref="AudioConfig"/> from <see cref="Config"/>. /// Depending on the available services, this may either use the audio features built into the Speech SDK (such as <see cref="AudioConfig.FromDefaultMicrophoneInput"/>), /// or it may construct a <see cref="IStreamAudioSource"/> that accesses the requested <see cref="AudioDevice"/> with resampling and noise gates as required. /// </summary> /// <returns></returns> protected AudioConfig GetAudioConfig() { var streamSource = GetStreamAudioSource(Config.AudioSource); if (streamSource != null) { //use this stream source and convert to an Azure audio stream try { var azureInput = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM( (uint)streamSource.Format.SampleRate, (byte)streamSource.Format.BitsPerSample, (byte)streamSource.Format.ChannelCount)); byte[] bufferOptional = null; streamSource.DataAvailable += (s, e) => { azureInput.Write(e.Buffer.GetArray(ref bufferOptional), e.Buffer.Count); }; streamSource.Stopped += (s, e) => { if (e.Cause == StreamAudioSourceStoppedCause.Stopped) { //signal end-of-stream to Azure azureInput.Close(); } }; this.StreamAudioSource = streamSource; return(AudioConfig.FromStreamInput(azureInput)); } catch (Exception ex) { Logger.LogError(ex, $"Error while creating an Azure AudioConfig from an IStreamAudioSource. Format: SampleRate={streamSource.Format.SampleRate}, BitsPerSample={streamSource.Format.BitsPerSample}, Channels={streamSource.Format.ChannelCount}"); streamSource.Dispose(); } } this.StreamAudioSource = null; this.StreamAudioNoiseGate = null; //try and use the built-in audio engine if (Config.AudioSource is AudioDevice audioDevice) { if (audioDevice.UseDefaultAudioInputDevice) { return(AudioConfig.FromDefaultMicrophoneInput()); } } return(null); }
void Start() { if (outputText == null) { UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it."); } else if (recoButton == null) { message = "recoButton property is null! Assign a UI Button to it."; UnityEngine.Debug.LogError(message); } else { // Continue with normal initialization, Text and Button objects are present. #if PLATFORM_ANDROID // Request to use the microphone, cf. // https://docs.unity3d.com/Manual/android-RequestingPermissions.html message = "Waiting for mic permission"; if (!Permission.HasUserAuthorizedPermission(Permission.Microphone)) { Permission.RequestUserPermission(Permission.Microphone); } #elif PLATFORM_IOS if (!Application.HasUserAuthorization(UserAuthorization.Microphone)) { Application.RequestUserAuthorization(UserAuthorization.Microphone); } #else micPermissionGranted = true; message = "Click button to recognize speech"; #endif grabacionCompleta = new StringBuilder(200); config = SpeechConfig.FromSubscription("b899f4a3bc2b4b30b3e690476b1af952", "westus"); config.SpeechRecognitionLanguage = "es-ES"; pushStream = AudioInputStream.CreatePushStream(); audioInput = AudioConfig.FromStreamInput(pushStream); recognizer = new SpeechRecognizer(config, audioInput); recognizer.Recognizing += RecognizingHandler; recognizer.Recognized += RecognizedHandler; recognizer.Canceled += CanceledHandler; recoButton.onClick.AddListener(ButtonClick); foreach (var device in Microphone.devices) { Debug.Log("DeviceName: " + device); } audioSource = GameObject.Find("MyAudioSource").GetComponent <AudioSource>(); } }
// Allows OverlordBot to listen for a specific word to start listening. Currently not used although the setup has all been done. // This is due to wierd state transition errors that I cannot be bothered to debug. Possible benefit is less calls to Speech endpoint but // not sure if that is good enough or not to keep investigating. //private readonly KeywordRecognitionModel _wakeWord; public SpeechRecognitionListener(BufferedWaveProvider bufferedWaveProvider, ConcurrentQueue <byte[]> responseQueue, RadioInformation radioInfo) { radioInfo.TransmissionQueue = responseQueue; _botType = radioInfo.botType; _frequency = radioInfo.freq; _callsign = radioInfo.callsign; _logClientId = radioInfo.name; switch (radioInfo.botType) { case "ATC": Controller = new AtcController { Callsign = radioInfo.callsign, Voice = radioInfo.voice, Radio = radioInfo }; break; case "AWACS": Controller = new AwacsController { Callsign = radioInfo.callsign, Voice = radioInfo.voice, Radio = radioInfo }; break; default: Controller = new MuteController { Callsign = radioInfo.callsign, Voice = null, Radio = null }; break; } var encoder = OpusEncoder.Create(AudioManager.InputSampleRate, 1, Application.Voip); encoder.ForwardErrorCorrection = false; encoder.FrameByteCount(AudioManager.SegmentFrames); var streamReader = new BufferedWaveProviderStreamReader(bufferedWaveProvider); _audioConfig = AudioConfig.FromStreamInput(streamReader, AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1)); //_wakeWord = KeywordRecognitionModel.FromFile($"Overlord/WakeWords/{callsign}.table"); }
void ConfigureSpeechRecognizer() { _speechConfig = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion); _speechConfig.SpeechRecognitionLanguage = "es-US"; _speechConfig.OutputFormat = OutputFormat.Detailed; _pushStream = AudioInputStream.CreatePushStream(); _audioInput = AudioConfig.FromStreamInput(_pushStream); _speechRecognizer = new SpeechRecognizer(_speechConfig, _audioInput); _speechRecognizer.Recognizing += SpeechRecognizingHandler; _speechRecognizer.Recognized += SpeechRecognizedHandler; _speechRecognizer.Canceled += SpeechCanceledHandler; _audioSource = GameObject.Find("AudioSource").GetComponent <AudioSource>(); _audioSource.loop = false; _audioSource.playOnAwake = false; }
void Start() { if (outputText == null) { UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it."); } else if (recoButton == null) { _message = "recoButton property is null! Assign a UI Button to it."; UnityEngine.Debug.LogError(_message); } else { // Continue with normal initialization, Text and Button objects are present. #if PLATFORM_ANDROID // Request to use the microphone, cf. // https://docs.unity3d.com/Manual/android-RequestingPermissions.html message = "Waiting for mic permission"; if (!Permission.HasUserAuthorizedPermission(Permission.Microphone)) { Permission.RequestUserPermission(Permission.Microphone); } #elif PLATFORM_IOS if (!Application.HasUserAuthorization(UserAuthorization.Microphone)) { Application.RequestUserAuthorization(UserAuthorization.Microphone); } #else _micPermissionGranted = true; _message = "Click button to recognize speech"; #endif _config = SpeechTranslationConfig.FromSubscription(SubscriptionKey, SubscriptionRegion); _config.SpeechRecognitionLanguage = "es-US"; _config.AddTargetLanguage("en-US"); _pushStream = AudioInputStream.CreatePushStream(); _audioInput = AudioConfig.FromStreamInput(_pushStream); _recognizer = new TranslationRecognizer(_config, _audioInput); _recognizer.Recognizing += RecognizingHandler; _recognizer.Recognized += RecognizedHandler; _recognizer.Canceled += CanceledHandler; foreach (var device in Microphone.devices) { Debug.Log("DeviceName: " + device); } _audioSource = GameObject.Find("AudioSource").GetComponent <AudioSource>(); } }
/// <summary> /// 音频转文字 /// </summary> /// <param name="audioStream">音频流</param> /// <param name="language">音频语言</param> /// <returns>识别结果</returns> public async Task <string> SpeechToText(Stream audioStream, string language) { var result = new StringBuilder(); using (var audioConfig = AudioConfig.FromStreamInput(new ReadPCMStream(audioStream), _audioStreamFormat)) { // 订阅信息配置 var config = SpeechConfig.FromSubscription(this._azureConfigs.SpeechToText.ApiKey, this._azureConfigs.SpeechToText.Region); // 语言配置 config.SpeechRecognitionLanguage = language; // 此处作为停止器 var stopRecognition = new TaskCompletionSource <int>(); // 创建分析器 using (var recognizer = new SpeechRecognizer(config, audioConfig)) { // 订阅分析事件 recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.RecognizedSpeech) { result.AppendLine(e.Result.Text); } }; recognizer.Canceled += (s, e) => { if (e.Reason == CancellationReason.Error) { result.AppendLine($"识别取消: 错误码={e.ErrorCode}"); result.AppendLine($"识别取消: 错误详情={e.ErrorDetails}"); result.AppendLine($"识别取消: 请检查你的Azure订阅是否更新"); } stopRecognition.TrySetResult(0); }; // 开始连续的识别。使用stopcontinuousrecognition()来停止识别。 await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // 等待完成。 Task.WaitAny(new[] { stopRecognition.Task }); // 停止识别 await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } return(result.ToString()); }
public static AudioConfig OpenWavFile(BinaryReader reader) { // Tag "RIFF" char[] data = new char[4]; reader.Read(data, 0, 4); Trace.Assert((data[0] == 'R') && (data[1] == 'I') && (data[2] == 'F') && (data[3] == 'F'), "Wrong wav header"); // Chunk size long fileSize = reader.ReadInt32(); // Subchunk, Wave Header // Subchunk, Format // Tag: "WAVE" reader.Read(data, 0, 4); Trace.Assert((data[0] == 'W') && (data[1] == 'A') && (data[2] == 'V') && (data[3] == 'E'), "Wrong wav tag in wav header"); // Tag: "fmt" reader.Read(data, 0, 4); Trace.Assert((data[0] == 'f') && (data[1] == 'm') && (data[2] == 't') && (data[3] == ' '), "Wrong format tag in wav header"); // chunk format size var formatSize = reader.ReadInt32(); var formatTag = reader.ReadUInt16(); var channels = reader.ReadUInt16(); var samplesPerSecond = reader.ReadUInt32(); var avgBytesPerSec = reader.ReadUInt32(); var blockAlign = reader.ReadUInt16(); var bitsPerSample = reader.ReadUInt16(); // Until now we have read 16 bytes in format, the rest is cbSize and is ignored for now. if (formatSize > 16) { reader.ReadBytes((int)(formatSize - 16)); } // Second Chunk, data // tag: data. reader.Read(data, 0, 4); Trace.Assert((data[0] == 'd') && (data[1] == 'a') && (data[2] == 't') && (data[3] == 'a'), "Wrong data tag in wav"); // data chunk size int dataSize = reader.ReadInt32(); // now, we have the format in the format parameter and the // reader set to the start of the body, i.e., the raw sample data AudioStreamFormat format = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, (byte)bitsPerSample, (byte)channels); return(AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format)); }
public async Task Start() { var config = SpeechConfig.FromSubscription(_projectSettings.AzureSpeechServiceSubscriptionKey, _projectSettings.AzureSpeechServiceRegionName); var audioFormat = AudioStreamFormat.GetWaveFormatPCM(8000, 16, 1); _inputStream = AudioInputStream.CreatePushStream(audioFormat); _audioInput = AudioConfig.FromStreamInput(_inputStream); _recognizer = new SpeechRecognizer(config, _audioInput); _recognizer.SessionStarted += RecognizerStarted; _recognizer.Recognized += RecognizerRecognized; _recognizer.Canceled += RecognizerCancelled; await _recognizer.StartContinuousRecognitionAsync(); }
public async UniTask STTBytes(byte[] readBytes, int sampleRate, int bitRate, int channels) { var speechConfig = SpeechConfig.FromSubscription(subscription_key, region); speechConfig.SpeechRecognitionLanguage = location; var audioStreamFormat = AudioStreamFormat.GetWaveFormatPCM((uint)sampleRate, (byte)bitRate, (byte)channels); var audioInputStream = AudioInputStream.CreatePushStream(audioStreamFormat); var audioConfig = AudioConfig.FromStreamInput(audioInputStream); var recognizer = new SpeechRecognizer(speechConfig, audioConfig); audioInputStream.Write(readBytes, readBytes.Length); var result = await recognizer.RecognizeOnceAsync(); Debug.Log($"Recognized Line : = {result.Text}"); }
public static AudioConfig DownloadWavFile(BinaryReader reader) { // Tag "RIFF" char[] data = new char[4]; reader.Read(data, 0, 4); // Chunk size long fileSize = reader.ReadInt32(); // Subchunk, Wave Header // Subchunk, Format // Tag: "WAVE" reader.Read(data, 0, 4); // Tag: "fmt" reader.Read(data, 0, 4); // chunk format size var formatSize = reader.ReadInt32(); var formatTag = reader.ReadUInt16(); var channels = reader.ReadUInt16(); var samplesPerSecond = reader.ReadUInt32(); var avgBytesPerSec = reader.ReadUInt32(); var blockAlign = reader.ReadUInt16(); var bitsPerSample = reader.ReadUInt16(); // Until now we have read 16 bytes in format, the rest is cbSize and is ignored for now. if (formatSize > 16) { reader.ReadBytes(formatSize - 16); } // Second Chunk, data // tag: data. reader.Read(data, 0, 4); // data chunk size int dataSize = reader.ReadInt32(); // now, we have the format in the format parameter and the // reader set to the start of the body, i.e., the raw sample data AudioStreamFormat format = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, (byte)bitsPerSample, (byte)channels); return(AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format)); }
public async Task <ActionResult <IEnumerable <SpeechAnalisys> > > RecognizeSpeechAsync(IFormFile file) { if (file == null || file.Length == 0) { return(BadRequest("Um arquivo de audio é necessário!")); } if (file.ContentType != "audio/wav") { return(BadRequest("Permitido somente formato wav!")); } var config = SpeechConfig.FromSubscription(SUBSCRIPTION_KEY, SUBSCRIPTION_REGION); var result = new List <SpeechAnalisys>(); var stopRecognition = new TaskCompletionSource <int>(); using (var audioInput = AudioConfig.FromStreamInput(new PullAudioInputStream(new BinaryAudioStreamReader( new BinaryReader(file.OpenReadStream()))))) { using (var recognizer = new SpeechRecognizer(config, audioInput)) { recognizer.Recognized += (s, e) => { if (e.Result.Reason == ResultReason.RecognizedSpeech) { result.Add(new SpeechAnalisys { Sentence = e.Result.Text }); } }; recognizer.Canceled += (s, e) => stopRecognition.TrySetResult(0); recognizer.SessionStopped += (s, e) => stopRecognition.TrySetResult(0); await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); Task.WaitAny(new[] { stopRecognition.Task }); await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); } } return(Ok(result)); }
private void Init(string from, string to) { this.toLanguage = to; Profile = MediaEncodingProfile.CreateWav(AudioEncodingQuality.Low); Profile.Audio = AudioEncodingProperties.CreatePcm(16000, 1, 16); byte channels = 1; byte bitsPerSample = 16; uint samplesPerSecond = 16000; // or 8000 var audioFormat = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels); // Init Push Stream pushStream = AudioInputStream.CreatePushStream(audioFormat); if (from == to) { var config = SpeechConfig.FromSubscription(apiKey, region); config.SpeechRecognitionLanguage = from; speechRecognizer = new SpeechRecognizer(config, AudioConfig.FromStreamInput(pushStream)); speechRecognizer.Recognizing += RecognisingSpeechHandler; speechRecognizer.Recognized += RecognisingSpeechHandler; speechRecognizer.SessionStarted += (sender, args) => this.RecognisionStarted?.Invoke(); speechRecognizer.SessionStopped += (sender, args) => this.RecognisionStopped?.Invoke(); } else { var config = SpeechTranslationConfig.FromSubscription(apiKey, region); config.SpeechRecognitionLanguage = from; config.AddTargetLanguage(to); translationRecognizer = new TranslationRecognizer(config, AudioConfig.FromStreamInput(pushStream)); translationRecognizer.SessionStarted += (sender, args) => this.RecognisionStarted?.Invoke(); translationRecognizer.SessionStopped += (sender, args) => this.RecognisionStopped?.Invoke(); translationRecognizer.Recognizing += RecognisingTranslationHandler; translationRecognizer.Recognized += RecognisingTranslationHandler; } }
async static Task FromStream(SpeechConfig speechConfig) { var reader = new BinaryReader(File.OpenRead(DEMO_FILE)); Console.WriteLine(reader.ToString()); using var audioInputStream = AudioInputStream.CreatePushStream(); using var audioConfig = AudioConfig.FromStreamInput(audioInputStream); using var recognizer = new SpeechRecognizer(speechConfig, audioConfig); byte[] readBytes; do { readBytes = reader.ReadBytes(1024); audioInputStream.Write(readBytes, readBytes.Length); } while (readBytes.Length > 0); var result = await recognizer.RecognizeOnceAsync(); Console.WriteLine($"RECOGNIZED: Text={result.Text}"); }