private static async Task ConvertAudioToTextAsync(Stream audioBlob, Stream textBlob, SpeechConfig config)
        {
            var completionSource = new TaskCompletionSource <int>();

            using (var audioInput = AudioConfig.FromStreamInput(new AudioStreamReader(audioBlob)))
            {
                using (var recognizer = new SpeechRecognizer(config, audioInput))
                {
                    var streamWriter = new StreamWriter(textBlob);

                    recognizer.Recognized += (s, e) => streamWriter.Write(e.Result);

                    recognizer.SessionStopped += (s, e) =>
                    {
                        streamWriter.Flush();
                        streamWriter.Dispose();
                        completionSource.TrySetResult(0);
                    };

                    await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                    await Task.WhenAny(new[] { completionSource.Task });

                    await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
                }
            }
        }
示例#2
0
        private static async Task <string> UploadAudioAndStartRemoteTranscription(string key, string region)
        {
            AudioStreamFormat audioStreamFormat;

            var config = SpeechConfig.FromSubscription(key, region);

            config.SetProperty("ConversationTranscriptionInRoomAndOnline", "true");
            config.SetServiceProperty("transcriptionMode", "RealTimeAndAsync", ServicePropertyChannel.UriQueryParameter);
            var waveFilePullStream = OpenWavFile(@"katiesteve.wav", out audioStreamFormat);
            var audioInput         = AudioConfig.FromStreamInput(AudioInputStream.CreatePullStream(waveFilePullStream, audioStreamFormat));

            var meetingId = Guid.NewGuid().ToString();

            using (var conversation = await Conversation.CreateConversationAsync(config, meetingId))
            {
                using (var conversationTranscriber = TrackSessionId(new ConversationTranscriber(audioInput)))
                {
                    await conversationTranscriber.JoinConversationAsync(conversation);

                    await conversation.AddParticipantAsync("OneUserByUserId");

                    var user = User.FromUserId("CreateUserFromId and then add it");
                    await conversation.AddParticipantAsync(user);

                    var result = await GetRecognizerResult(conversationTranscriber, meetingId);
                }
            }
            return(meetingId);
        }
        private void SetupTranscriptionAndTranslationService()
        {
            try
            {
                var lCognitiveKey    = _settings.AzureCognitiveKey;
                var lCognitiveRegion = _settings.AzureCognitiveRegion;

                _eventPublisher.Publish("MySTT Setup", $"Got region: {lCognitiveRegion}, key starting from: {lCognitiveKey??lCognitiveKey.Substring(0, lCognitiveKey.Length /2)}");

                this.mTransSpeechConfig = SpeechTranslationConfig.FromSubscription(lCognitiveKey, lCognitiveRegion);
                var fromLanguage = "en-US";
                var toLanguages  = new List <string> {
                    "el-GR"
                };
                //var toLanguages = new List<string> { "ru-RU" };
                this.mTransSpeechConfig.SpeechRecognitionLanguage = fromLanguage;
                toLanguages.ForEach(this.mTransSpeechConfig.AddTargetLanguage);
                this.mInputStream = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM(SAMPLESPERSECOND, BITSPERSAMPLE, NUMBEROFCHANNELS));

                this.mAudioConfig           = AudioConfig.FromStreamInput(this.mInputStream);
                this.mTranslationRecognizer = new TranslationRecognizer(this.mTransSpeechConfig, this.mAudioConfig);

                this.mTranslationRecognizer.Recognizing       += this.MSpeechRecognizer_Recognizing;
                this.mTranslationRecognizer.Recognized        += this.MSpeechRecognizer_Recognized;
                this.mTranslationRecognizer.SpeechEndDetected += this.MSpeechRecognizer_SpeechEndDetected;

                this.StartRecognisionIfNeeded();
            }
            catch (Exception ex)
            {
                _eventPublisher.Publish("MySTT Setup - Failed", $"Failed to initialize: {ex.Message}");
            }
        }
示例#4
0
    public static AudioConfig OpenWavFile(Stream stream)
    {
        BinaryReader      reader = new BinaryReader(stream);
        AudioStreamFormat format = readWaveHeader(reader);

        return(AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format));
    }
示例#5
0
        public async void AudioStart()
        {
            var audioStream  = new VoiceAudioStream();
            var audioFormat  = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);
            var audioConfig  = AudioConfig.FromStreamInput(audioStream, audioFormat);
            var speechConfig = SpeechConfig.FromSubscription(_config["SpeechApiKey"], _config["SpeechRegion"]);
            var speechClient = new SpeechRecognizer(speechConfig, audioConfig);
            var phraseList   = PhraseListGrammar.FromRecognizer(speechClient);

            foreach (var phrase in phrases)
            {
                phraseList.AddPhrase(phrase);
            }

            speechClient.Recognized += _speechClient_Recognized;

            string sessionId = speechClient.Properties.GetProperty(PropertyId.Speech_SessionId);

            var conn = new ConnectionInfo()
            {
                SessionId    = sessionId,
                AudioStream  = audioStream,
                SpeechClient = speechClient,
            };

            _connections.Add(Context.ConnectionId, conn);

            await speechClient.StartContinuousRecognitionAsync();

            Debug.WriteLine("Audio start message.");
        }
        public async Task <string> DetectLanguage(byte[] audioBytes, string fileExtension, string locale1, string locale2)
        {
            var wavBytes = ConvertToWaveBytes(audioBytes, fileExtension);

            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { locale1, locale2 });

            var config          = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion);
            var stopRecognition = new TaskCompletionSource <int>();
            var detected        = new List <string>();

            using var pushStream = AudioInputStream.CreatePushStream();
            using (var audioInput = AudioConfig.FromStreamInput(pushStream))
            {
                using var recognizer = new SpeechRecognizer(
                          config,
                          autoDetectSourceLanguageConfig,
                          audioInput);
                pushStream.Write(wavBytes);
                pushStream.Close();

                recognizer.Recognized += (s, e) =>
                {
                    var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result);
                    var detectedLanguage = autoDetectSourceLanguageResult.Language;
                    detected.Add(detectedLanguage);
                    if (detected.Count > UtteranceCount)
                    {
                        stopRecognition.TrySetResult(0);
                    }
                };

                recognizer.SessionStopped += (s, e) =>
                {
                    stopRecognition.TrySetResult(0);
                };

                await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                var t = Task.Factory.StartNew(async() => { await SetTimeOutForRecognition(stopRecognition).ConfigureAwait(false); }, CancellationToken.None, TaskCreationOptions.None, TaskScheduler.Default);

                Task.WaitAny(new[] { stopRecognition.Task });

                await recognizer.StopKeywordRecognitionAsync().ConfigureAwait(false);
            }

            if (detected.Count == 0)
            {
                throw new TimeoutException("Did not get any language identification results back in time.");
            }

            var detectedByCount = detected.GroupBy(i => i);
            var mostFreq        = detectedByCount.OrderBy(t => t.Count()).LastOrDefault().Key;

            if (string.IsNullOrEmpty(mostFreq) || (!mostFreq.Equals(locale1, StringComparison.OrdinalIgnoreCase) && !mostFreq.Equals(locale2, StringComparison.OrdinalIgnoreCase)))
            {
                return(locale1);
            }

            return(mostFreq);
        }
        /// <summary>
        /// Starts this instance.
        /// </summary>
        private async Task _start()
        {
            await this._syncLock.WaitAsync().ConfigureAwait(false);

            if (!_isRunning)
            {
                _tokenSource = new CancellationTokenSource();
                _buffer      = new BufferBlock <SerializableAudioMediaBuffer>(new DataflowBlockOptions {
                    CancellationToken = this._tokenSource.Token
                });
                await Task.Factory.StartNew(this._process).ConfigureAwait(false);

                // Initialize speech recognizer.
                Debug.WriteLine("RecordingBot _start.");
                _audioStream = new VoiceAudioStream();
                var audioFormat  = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);
                var audioConfig  = AudioConfig.FromStreamInput(_audioStream, audioFormat);
                var speechConfig = SpeechConfig.FromSubscription("03f0f0daa33448ba9f9bf799d2e14d2a", "westus2");

                _speechClient              = new SpeechRecognizer(speechConfig, audioConfig);
                _speechClient.Recognized  += _speechClient_Recognized;
                _speechClient.Recognizing += _speechClient_Recognizing;
                _speechClient.Canceled    += _speechClient_Canceled;
                await _speechClient.StartContinuousRecognitionAsync();

                _isRunning = true;
            }
            this._syncLock.Release();
        }
示例#8
0
        //private const string speechEndpoint = "https://YOUR_LOCATION.api.cognitive.microsoft.com/";

        //public async Task<IActionResult> OnGetAsync()
        //{
        //    return Page();
        //}

        public async Task <IActionResult> OnPostAsync()
        {
            var speechConfig = SpeechConfig.FromSubscription(speechKey, speechLocation);

            speechConfig.SpeechRecognitionLanguage = "ja-JP";

            byte[] readBytes;

            using var audioInputStream = AudioInputStream.CreatePushStream();
            using var reader           = new BinaryReader(VoiceFile.OpenReadStream());
            do
            {
                readBytes = reader.ReadBytes(1024);
                audioInputStream.Write(readBytes, readBytes.Length);
            } while (readBytes.Length > 0);

            var audioConfig = AudioConfig.FromStreamInput(audioInputStream);

            using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
            var result = await speechRecognizer.RecognizeOnceAsync();

            if (result.Reason == ResultReason.RecognizedSpeech)
            {
                Result         = "Œ‹‰Ê:";
                RecognizedText = result.Text;
            }

            return(Page());
        }
示例#9
0
        /// <summary>
        /// Returns speech to text from selected Opus audiofile streamed from a blobcontainer in Azure Storage.
        /// </summary>
        /// <param name="opusBlob">Name of opus file</param>
        /// <param name="container">Azure blob container name</param>
        /// <returns>List<Speech> container speechresults</returns>
        public async Task <List <Speech> > RunRecognitionAsync(string opusBlob, string container)
        {
            SpeechResult = new List <Speech>();

            var blobService = new BlobService();
            var blobClient  = await blobService.GetBlobFromContainerAsync(opusBlob, container);

            using var audioInputStream = AudioInputStream.CreatePushStream();
            using var audioConfig      = AudioConfig.FromStreamInput(audioInputStream);
            using (var recognizer = new SpeechRecognizer(_speechConfig, _languagesToDetect, audioConfig))
            {
                recognizer.Recognizing    += Recognizing;
                recognizer.Recognized     += Recognized;
                recognizer.SessionStarted += SessionStarted;
                recognizer.SessionStopped += SessionStopped;
                recognizer.Canceled       += SessionCanceled;

                await InjectStreamIntoRecognizerAsync(audioInputStream, blobClient);

                await recognizer.StartContinuousRecognitionAsync();

                Task.WaitAny(new[] { _stopRecognition.Task });
                await recognizer.StopContinuousRecognitionAsync();
            }

            return(SpeechResult);
        }
        /// <summary>
        /// Remote audio transcription of the given audioFile with CognitiveServices
        /// </summary>
        public static AnalysisResult TranscribeAudio(ref AnalysisResult audioResponse, IFormFile audioFile)
        {
            // needed for speaker diarization to resolve at the word level
            SPEECH_CONFIG.RequestWordLevelTimestamps();

            var audioFormat128 = AudioStreamFormat.GetWaveFormatPCM(8000, 16, 1);
            var audioFormat256 = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);

            // load bytestream -> audio stream
            // load audio config from audio stream
            // initialize speech recognizer
            using (var br = new BinaryReader(audioFile.OpenReadStream()))
                using (var audioInputStream = AudioInputStream.CreatePushStream(audioFormat128))
                    using (var audioConfig = AudioConfig.FromStreamInput(audioInputStream))
                        using (var recognizer = new SpeechRecognizer(SPEECH_CONFIG, audioConfig))
                        {
                            long nbytes = audioFile.Length;
                            var  buff   = new List <byte>();

                            // read through bytes of audio
                            byte[] readBytes;
                            do
                            {
                                readBytes = br.ReadBytes(1024);
                                buff.AddRange(readBytes);
                                audioInputStream.Write(readBytes, readBytes.Length);
                            } while (readBytes.Length > 0);

                            var transcript = ExecuteRecognizer(recognizer).Result;
                            audioResponse.Transcript = transcript;
                            return(audioResponse);
                        }
        }
示例#11
0
        public static async Task <string> RecognizeSpeechFromUrlAsync(string url, string locale)
        {
            byte[] audioData = null;
            using (var wc = new System.Net.WebClient())
            {
                audioData = wc.DownloadData(url);
            }
            var stream = new MemoryStream(audioData);

            var speechApiKey    = Environment.GetEnvironmentVariable("SpeechApiKey");
            var speechApiRegion = Environment.GetEnvironmentVariable("SpeechApiRegion");

            var speechConfig = SpeechConfig.FromSubscription(speechApiKey, speechApiRegion);

            speechConfig.SpeechRecognitionLanguage = locale;

            var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);
            var audioStream = new VoiceAudioStream(stream);
            var audioConfig = AudioConfig.FromStreamInput(audioStream, audioFormat);

            var recognizer = new SpeechRecognizer(speechConfig, audioConfig);
            var result     = await recognizer.RecognizeOnceAsync();

            return(result.Text);
        }
 /// <summary>
 /// 从音频流进行识别
 ///
 /// 目前只支持特定的音频,详情查看:
 /// https://docs.microsoft.com/zh-cn/azure/cognitive-services/speech-service/how-to-use-audio-input-streams
 /// </summary>
 /// <param name="stream"></param>
 /// <returns></returns>
 public static async Task FormStream(Stream stream)
 {
     using (var audioConfig = AudioConfig.FromStreamInput(new ReadPCMStream(stream), AudioStreamFormat)) // 从文件读取
     {
         await SpeechRecognizer(audioConfig);
     }
 }
        /// <summary>
        /// This method opens wavefile.
        /// </summary>
        /// <param name="filename">The wavfile to read the audio data from.</param>
        public static AudioConfig OpenWavFile(string filename)
        {
            AudioStreamFormat format = null;
            var callback             = OpenWavFileStream(filename, out format);

            return(AudioConfig.FromStreamInput(callback, format));
        }
示例#14
0
        public static AudioConfig OpenWavFile(BinaryReader reader, AudioProcessingOptions audioProcessingOptions = null)
        {
            AudioStreamFormat format = readWaveHeader(reader);

            return((audioProcessingOptions == null)
                    ? AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format)
                    : AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format, audioProcessingOptions));
        }
示例#15
0
        public async void RegisterAttendeeAsync(string name, string myLanguage, string preferredLanguage)
        {
            Debug.WriteLine($"User {name}, Language: {myLanguage}, Connection {Context.ConnectionId} starting audio.");
            var config = _config.GetSection("SpeechAPI").Get <AppSettings>();

            bool exists = await InitializeAttendeeInfo(name, myLanguage, preferredLanguage);

            var audioStream = new VoiceAudioStream();
            var audioFormat = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);
            var audioConfig = AudioConfig.FromStreamInput(audioStream, audioFormat);

            var speechKey    = config.SubscriptionKey;
            var speechRegion = config.Region;
            var url          = config.EndpointUri;

            Debug.WriteLine($"Key:{speechKey} | Region:{speechRegion}");

            var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);

            speechConfig.SpeechRecognitionLanguage = preferredLanguage;
            speechConfig.OutputFormat = OutputFormat.Simple;

            var speechClient = new SpeechRecognizer(speechConfig, audioConfig);

            speechClient.Recognized     += _speechClient_Recognized;
            speechClient.Recognizing    += _speechClient_Recognizing;
            speechClient.Canceled       += _speechClient_Canceled;
            speechClient.SessionStarted += _speechClient_SessionStarted;
            string sessionId = speechClient.Properties.GetProperty(PropertyId.Speech_SessionId);

            //Maintains only one API connection per language
            SpeechAPIConnection conn = null;

            if (_connections.ContainsKey(preferredLanguage))
            {
                conn           = _connections[preferredLanguage];
                conn.SessionId = sessionId;
            }
            else
            {
                conn = new SpeechAPIConnection()
                {
                    SessionId   = sessionId,
                    AudioStream = audioStream,
                    Recognizer  = speechClient,
                    Language    = preferredLanguage
                };
                _connections[preferredLanguage] = conn;
            }

            Debug.WriteLine($"Connection for {preferredLanguage} added | SessionId:{sessionId}");

            await SendToAttendeeAsync(_attendeeInfo.GetAttendeeByConnectionID(Context.ConnectionId), $"Welcome:{name}");

            await speechClient.StartContinuousRecognitionAsync();

            Debug.WriteLine("Audio start message.");
        }
示例#16
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ContinuousSpeechRecognizer"/> class.
        /// </summary>
        /// <param name="pipeline">The pipeline in which to create the component.</param>
        /// <param name="subscriptionKey">The subscription key for the Azure speech resource.</param>
        /// <param name="region">The service region of the Azure speech resource.</param>
        public ContinuousSpeechRecognizer(Pipeline pipeline, string subscriptionKey, string region)
            : base(pipeline)
        {
            var config = SpeechConfig.FromSubscription(subscriptionKey, region);

            this.pushStream = AudioInputStream.CreatePushStream();
            this.audioInput = AudioConfig.FromStreamInput(this.pushStream);
            this.recognizer = new SpeechRecognizer(config, this.audioInput);
        }
示例#17
0
 public TranslationEngine(IConfiguration config, IHubContext <TranslationHub> hub)
 {
     _hub               = hub;
     _config            = config;
     _translationConfig = SpeechTranslationConfig.FromSubscription(_config["SUBSCRIPTION_KEY"], _config["REGION"]);
     _speechConfig      = SpeechTranslationConfig.FromSubscription(_config["SUBSCRIPTION_KEY"], _config["REGION"]);
     _audioInput        = AudioConfig.FromStreamInput(_inputStream);
     _audioOutputStream = AudioOutputStream.CreatePullStream();
     _output            = AudioConfig.FromStreamOutput(_audioOutputStream);
 }
        /// <summary>
        /// Constructs an <see cref="AudioConfig"/> from <see cref="Config"/>.
        /// Depending on the available services, this may either use the audio features built into the Speech SDK (such as <see cref="AudioConfig.FromDefaultMicrophoneInput"/>),
        /// or it may construct a <see cref="IStreamAudioSource"/> that accesses the requested <see cref="AudioDevice"/> with resampling and noise gates as required.
        /// </summary>
        /// <returns></returns>
        protected AudioConfig GetAudioConfig()
        {
            var streamSource = GetStreamAudioSource(Config.AudioSource);

            if (streamSource != null)
            {
                //use this stream source and convert to an Azure audio stream
                try
                {
                    var azureInput = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM(
                                                                           (uint)streamSource.Format.SampleRate,
                                                                           (byte)streamSource.Format.BitsPerSample,
                                                                           (byte)streamSource.Format.ChannelCount));

                    byte[] bufferOptional = null;
                    streamSource.DataAvailable += (s, e) =>
                    {
                        azureInput.Write(e.Buffer.GetArray(ref bufferOptional), e.Buffer.Count);
                    };
                    streamSource.Stopped += (s, e) =>
                    {
                        if (e.Cause == StreamAudioSourceStoppedCause.Stopped)
                        {
                            //signal end-of-stream to Azure
                            azureInput.Close();
                        }
                    };

                    this.StreamAudioSource = streamSource;
                    return(AudioConfig.FromStreamInput(azureInput));
                }
                catch (Exception ex)
                {
                    Logger.LogError(ex, $"Error while creating an Azure AudioConfig from an IStreamAudioSource. Format: SampleRate={streamSource.Format.SampleRate}, BitsPerSample={streamSource.Format.BitsPerSample}, Channels={streamSource.Format.ChannelCount}");
                    streamSource.Dispose();
                }
            }

            this.StreamAudioSource    = null;
            this.StreamAudioNoiseGate = null;

            //try and use the built-in audio engine
            if (Config.AudioSource is AudioDevice audioDevice)
            {
                if (audioDevice.UseDefaultAudioInputDevice)
                {
                    return(AudioConfig.FromDefaultMicrophoneInput());
                }
            }

            return(null);
        }
示例#19
0
    void Start()
    {
        if (outputText == null)
        {
            UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it.");
        }
        else if (recoButton == null)
        {
            message = "recoButton property is null! Assign a UI Button to it.";
            UnityEngine.Debug.LogError(message);
        }
        else
        {
            // Continue with normal initialization, Text and Button objects are present.
#if PLATFORM_ANDROID
            // Request to use the microphone, cf.
            // https://docs.unity3d.com/Manual/android-RequestingPermissions.html
            message = "Waiting for mic permission";
            if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
            {
                Permission.RequestUserPermission(Permission.Microphone);
            }
#elif PLATFORM_IOS
            if (!Application.HasUserAuthorization(UserAuthorization.Microphone))
            {
                Application.RequestUserAuthorization(UserAuthorization.Microphone);
            }
#else
            micPermissionGranted = true;
            message = "Click button to recognize speech";
#endif

            grabacionCompleta = new StringBuilder(200);

            config = SpeechConfig.FromSubscription("b899f4a3bc2b4b30b3e690476b1af952", "westus");
            config.SpeechRecognitionLanguage = "es-ES";
            pushStream              = AudioInputStream.CreatePushStream();
            audioInput              = AudioConfig.FromStreamInput(pushStream);
            recognizer              = new SpeechRecognizer(config, audioInput);
            recognizer.Recognizing += RecognizingHandler;
            recognizer.Recognized  += RecognizedHandler;
            recognizer.Canceled    += CanceledHandler;

            recoButton.onClick.AddListener(ButtonClick);
            foreach (var device in Microphone.devices)
            {
                Debug.Log("DeviceName: " + device);
            }
            audioSource = GameObject.Find("MyAudioSource").GetComponent <AudioSource>();
        }
    }
        // Allows OverlordBot to listen for a specific word to start listening. Currently not used although the setup has all been done.
        // This is due to wierd state transition errors that I cannot be bothered to debug. Possible benefit is less calls to Speech endpoint but
        // not sure if that is good enough or not to keep investigating.
        //private readonly KeywordRecognitionModel _wakeWord;

        public SpeechRecognitionListener(BufferedWaveProvider bufferedWaveProvider, ConcurrentQueue <byte[]> responseQueue, RadioInformation radioInfo)
        {
            radioInfo.TransmissionQueue = responseQueue;
            _botType   = radioInfo.botType;
            _frequency = radioInfo.freq;
            _callsign  = radioInfo.callsign;

            _logClientId = radioInfo.name;

            switch (radioInfo.botType)
            {
            case "ATC":
                Controller = new AtcController
                {
                    Callsign = radioInfo.callsign,
                    Voice    = radioInfo.voice,
                    Radio    = radioInfo
                };
                break;

            case "AWACS":
                Controller = new AwacsController
                {
                    Callsign = radioInfo.callsign,
                    Voice    = radioInfo.voice,
                    Radio    = radioInfo
                };
                break;

            default:
                Controller = new MuteController
                {
                    Callsign = radioInfo.callsign,
                    Voice    = null,
                    Radio    = null
                };
                break;
            }

            var encoder = OpusEncoder.Create(AudioManager.InputSampleRate, 1, Application.Voip);

            encoder.ForwardErrorCorrection = false;
            encoder.FrameByteCount(AudioManager.SegmentFrames);

            var streamReader = new BufferedWaveProviderStreamReader(bufferedWaveProvider);

            _audioConfig = AudioConfig.FromStreamInput(streamReader, AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1));

            //_wakeWord = KeywordRecognitionModel.FromFile($"Overlord/WakeWords/{callsign}.table");
        }
示例#21
0
 void ConfigureSpeechRecognizer()
 {
     _speechConfig = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion);
     _speechConfig.SpeechRecognitionLanguage = "es-US";
     _speechConfig.OutputFormat = OutputFormat.Detailed;
     _pushStream                    = AudioInputStream.CreatePushStream();
     _audioInput                    = AudioConfig.FromStreamInput(_pushStream);
     _speechRecognizer              = new SpeechRecognizer(_speechConfig, _audioInput);
     _speechRecognizer.Recognizing += SpeechRecognizingHandler;
     _speechRecognizer.Recognized  += SpeechRecognizedHandler;
     _speechRecognizer.Canceled    += SpeechCanceledHandler;
     _audioSource                   = GameObject.Find("AudioSource").GetComponent <AudioSource>();
     _audioSource.loop              = false;
     _audioSource.playOnAwake       = false;
 }
示例#22
0
    void Start()
    {
        if (outputText == null)
        {
            UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it.");
        }
        else if (recoButton == null)
        {
            _message = "recoButton property is null! Assign a UI Button to it.";
            UnityEngine.Debug.LogError(_message);
        }
        else
        {
            // Continue with normal initialization, Text and Button objects are present.
#if PLATFORM_ANDROID
            // Request to use the microphone, cf.
            // https://docs.unity3d.com/Manual/android-RequestingPermissions.html
            message = "Waiting for mic permission";
            if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
            {
                Permission.RequestUserPermission(Permission.Microphone);
            }
#elif PLATFORM_IOS
            if (!Application.HasUserAuthorization(UserAuthorization.Microphone))
            {
                Application.RequestUserAuthorization(UserAuthorization.Microphone);
            }
#else
            _micPermissionGranted = true;
            _message = "Click button to recognize speech";
#endif
            _config = SpeechTranslationConfig.FromSubscription(SubscriptionKey, SubscriptionRegion);
            _config.SpeechRecognitionLanguage = "es-US";
            _config.AddTargetLanguage("en-US");
            _pushStream              = AudioInputStream.CreatePushStream();
            _audioInput              = AudioConfig.FromStreamInput(_pushStream);
            _recognizer              = new TranslationRecognizer(_config, _audioInput);
            _recognizer.Recognizing += RecognizingHandler;
            _recognizer.Recognized  += RecognizedHandler;
            _recognizer.Canceled    += CanceledHandler;

            foreach (var device in Microphone.devices)
            {
                Debug.Log("DeviceName: " + device);
            }
            _audioSource = GameObject.Find("AudioSource").GetComponent <AudioSource>();
        }
    }
        /// <summary>
        /// 音频转文字
        /// </summary>
        /// <param name="audioStream">音频流</param>
        /// <param name="language">音频语言</param>
        /// <returns>识别结果</returns>
        public async Task <string> SpeechToText(Stream audioStream, string language)
        {
            var result = new StringBuilder();

            using (var audioConfig = AudioConfig.FromStreamInput(new ReadPCMStream(audioStream), _audioStreamFormat))
            {
                // 订阅信息配置
                var config = SpeechConfig.FromSubscription(this._azureConfigs.SpeechToText.ApiKey, this._azureConfigs.SpeechToText.Region);
                // 语言配置
                config.SpeechRecognitionLanguage = language;
                // 此处作为停止器
                var stopRecognition = new TaskCompletionSource <int>();
                // 创建分析器
                using (var recognizer = new SpeechRecognizer(config, audioConfig))
                {
                    // 订阅分析事件
                    recognizer.Recognized += (s, e) =>
                    {
                        if (e.Result.Reason == ResultReason.RecognizedSpeech)
                        {
                            result.AppendLine(e.Result.Text);
                        }
                    };
                    recognizer.Canceled += (s, e) =>
                    {
                        if (e.Reason == CancellationReason.Error)
                        {
                            result.AppendLine($"识别取消: 错误码={e.ErrorCode}");
                            result.AppendLine($"识别取消: 错误详情={e.ErrorDetails}");
                            result.AppendLine($"识别取消: 请检查你的Azure订阅是否更新");
                        }

                        stopRecognition.TrySetResult(0);
                    };

                    // 开始连续的识别。使用stopcontinuousrecognition()来停止识别。
                    await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                    // 等待完成。
                    Task.WaitAny(new[] { stopRecognition.Task });

                    // 停止识别
                    await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
                }
            }

            return(result.ToString());
        }
示例#24
0
        public static AudioConfig OpenWavFile(BinaryReader reader)
        {
            // Tag "RIFF"
            char[] data = new char[4];
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'R') && (data[1] == 'I') && (data[2] == 'F') && (data[3] == 'F'), "Wrong wav header");

            // Chunk size
            long fileSize = reader.ReadInt32();

            // Subchunk, Wave Header
            // Subchunk, Format
            // Tag: "WAVE"
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'W') && (data[1] == 'A') && (data[2] == 'V') && (data[3] == 'E'), "Wrong wav tag in wav header");

            // Tag: "fmt"
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'f') && (data[1] == 'm') && (data[2] == 't') && (data[3] == ' '), "Wrong format tag in wav header");

            // chunk format size
            var formatSize       = reader.ReadInt32();
            var formatTag        = reader.ReadUInt16();
            var channels         = reader.ReadUInt16();
            var samplesPerSecond = reader.ReadUInt32();
            var avgBytesPerSec   = reader.ReadUInt32();
            var blockAlign       = reader.ReadUInt16();
            var bitsPerSample    = reader.ReadUInt16();

            // Until now we have read 16 bytes in format, the rest is cbSize and is ignored for now.
            if (formatSize > 16)
            {
                reader.ReadBytes((int)(formatSize - 16));
            }

            // Second Chunk, data
            // tag: data.
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'd') && (data[1] == 'a') && (data[2] == 't') && (data[3] == 'a'), "Wrong data tag in wav");
            // data chunk size
            int dataSize = reader.ReadInt32();

            // now, we have the format in the format parameter and the
            // reader set to the start of the body, i.e., the raw sample data
            AudioStreamFormat format = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, (byte)bitsPerSample, (byte)channels);

            return(AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format));
        }
        public async Task Start()
        {
            var config = SpeechConfig.FromSubscription(_projectSettings.AzureSpeechServiceSubscriptionKey, _projectSettings.AzureSpeechServiceRegionName);

            var audioFormat = AudioStreamFormat.GetWaveFormatPCM(8000, 16, 1);

            _inputStream = AudioInputStream.CreatePushStream(audioFormat);
            _audioInput  = AudioConfig.FromStreamInput(_inputStream);

            _recognizer = new SpeechRecognizer(config, _audioInput);
            _recognizer.SessionStarted += RecognizerStarted;
            _recognizer.Recognized     += RecognizerRecognized;
            _recognizer.Canceled       += RecognizerCancelled;

            await _recognizer.StartContinuousRecognitionAsync();
        }
示例#26
0
    public async UniTask STTBytes(byte[] readBytes, int sampleRate, int bitRate, int channels)
    {
        var speechConfig = SpeechConfig.FromSubscription(subscription_key, region);

        speechConfig.SpeechRecognitionLanguage = location;
        var audioStreamFormat = AudioStreamFormat.GetWaveFormatPCM((uint)sampleRate, (byte)bitRate, (byte)channels);
        var audioInputStream  = AudioInputStream.CreatePushStream(audioStreamFormat);
        var audioConfig       = AudioConfig.FromStreamInput(audioInputStream);
        var recognizer        = new SpeechRecognizer(speechConfig, audioConfig);

        audioInputStream.Write(readBytes, readBytes.Length);

        var result = await recognizer.RecognizeOnceAsync();

        Debug.Log($"Recognized Line : = {result.Text}");
    }
示例#27
0
        public static AudioConfig DownloadWavFile(BinaryReader reader)
        {
            // Tag "RIFF"
            char[] data = new char[4];
            reader.Read(data, 0, 4);

            // Chunk size
            long fileSize = reader.ReadInt32();

            // Subchunk, Wave Header
            // Subchunk, Format
            // Tag: "WAVE"
            reader.Read(data, 0, 4);

            // Tag: "fmt"
            reader.Read(data, 0, 4);

            // chunk format size
            var formatSize       = reader.ReadInt32();
            var formatTag        = reader.ReadUInt16();
            var channels         = reader.ReadUInt16();
            var samplesPerSecond = reader.ReadUInt32();
            var avgBytesPerSec   = reader.ReadUInt32();
            var blockAlign       = reader.ReadUInt16();
            var bitsPerSample    = reader.ReadUInt16();

            // Until now we have read 16 bytes in format, the rest is cbSize and is ignored for now.
            if (formatSize > 16)
            {
                reader.ReadBytes(formatSize - 16);
            }

            // Second Chunk, data
            // tag: data.
            reader.Read(data, 0, 4);

            // data chunk size
            int dataSize = reader.ReadInt32();

            // now, we have the format in the format parameter and the
            // reader set to the start of the body, i.e., the raw sample data
            AudioStreamFormat format = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, (byte)bitsPerSample, (byte)channels);

            return(AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format));
        }
示例#28
0
        public async Task <ActionResult <IEnumerable <SpeechAnalisys> > > RecognizeSpeechAsync(IFormFile file)
        {
            if (file == null || file.Length == 0)
            {
                return(BadRequest("Um arquivo de audio é necessário!"));
            }
            if (file.ContentType != "audio/wav")
            {
                return(BadRequest("Permitido somente formato wav!"));
            }

            var config          = SpeechConfig.FromSubscription(SUBSCRIPTION_KEY, SUBSCRIPTION_REGION);
            var result          = new List <SpeechAnalisys>();
            var stopRecognition = new TaskCompletionSource <int>();

            using (var audioInput = AudioConfig.FromStreamInput(new PullAudioInputStream(new BinaryAudioStreamReader(
                                                                                             new BinaryReader(file.OpenReadStream())))))
            {
                using (var recognizer = new SpeechRecognizer(config, audioInput))
                {
                    recognizer.Recognized += (s, e) =>
                    {
                        if (e.Result.Reason == ResultReason.RecognizedSpeech)
                        {
                            result.Add(new SpeechAnalisys {
                                Sentence = e.Result.Text
                            });
                        }
                    };

                    recognizer.Canceled += (s, e) => stopRecognition.TrySetResult(0);

                    recognizer.SessionStopped += (s, e) => stopRecognition.TrySetResult(0);

                    await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                    Task.WaitAny(new[] { stopRecognition.Task });

                    await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
                }
            }

            return(Ok(result));
        }
示例#29
0
        private void Init(string from, string to)
        {
            this.toLanguage = to;

            Profile       = MediaEncodingProfile.CreateWav(AudioEncodingQuality.Low);
            Profile.Audio = AudioEncodingProperties.CreatePcm(16000, 1, 16);

            byte channels         = 1;
            byte bitsPerSample    = 16;
            uint samplesPerSecond = 16000; // or 8000
            var  audioFormat      = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels);

            // Init Push Stream

            pushStream = AudioInputStream.CreatePushStream(audioFormat);

            if (from == to)
            {
                var config = SpeechConfig.FromSubscription(apiKey, region);
                config.SpeechRecognitionLanguage = from;

                speechRecognizer = new SpeechRecognizer(config, AudioConfig.FromStreamInput(pushStream));

                speechRecognizer.Recognizing += RecognisingSpeechHandler;
                speechRecognizer.Recognized  += RecognisingSpeechHandler;

                speechRecognizer.SessionStarted += (sender, args) => this.RecognisionStarted?.Invoke();
                speechRecognizer.SessionStopped += (sender, args) => this.RecognisionStopped?.Invoke();
            }
            else
            {
                var config = SpeechTranslationConfig.FromSubscription(apiKey, region);
                config.SpeechRecognitionLanguage = from;
                config.AddTargetLanguage(to);

                translationRecognizer = new TranslationRecognizer(config, AudioConfig.FromStreamInput(pushStream));

                translationRecognizer.SessionStarted += (sender, args) => this.RecognisionStarted?.Invoke();
                translationRecognizer.SessionStopped += (sender, args) => this.RecognisionStopped?.Invoke();

                translationRecognizer.Recognizing += RecognisingTranslationHandler;
                translationRecognizer.Recognized  += RecognisingTranslationHandler;
            }
        }
示例#30
0
        async static Task FromStream(SpeechConfig speechConfig)
        {
            var reader = new BinaryReader(File.OpenRead(DEMO_FILE));

            Console.WriteLine(reader.ToString());
            using var audioInputStream = AudioInputStream.CreatePushStream();
            using var audioConfig      = AudioConfig.FromStreamInput(audioInputStream);
            using var recognizer       = new SpeechRecognizer(speechConfig, audioConfig);

            byte[] readBytes;
            do
            {
                readBytes = reader.ReadBytes(1024);
                audioInputStream.Write(readBytes, readBytes.Length);
            } while (readBytes.Length > 0);

            var result = await recognizer.RecognizeOnceAsync();

            Console.WriteLine($"RECOGNIZED: Text={result.Text}");
        }