private void SetupTranscriptionAndTranslationService()
        {
            try
            {
                var lCognitiveKey    = _settings.AzureCognitiveKey;
                var lCognitiveRegion = _settings.AzureCognitiveRegion;

                _eventPublisher.Publish("MySTT Setup", $"Got region: {lCognitiveRegion}, key starting from: {lCognitiveKey??lCognitiveKey.Substring(0, lCognitiveKey.Length /2)}");

                this.mTransSpeechConfig = SpeechTranslationConfig.FromSubscription(lCognitiveKey, lCognitiveRegion);
                var fromLanguage = "en-US";
                var toLanguages  = new List <string> {
                    "el-GR"
                };
                //var toLanguages = new List<string> { "ru-RU" };
                this.mTransSpeechConfig.SpeechRecognitionLanguage = fromLanguage;
                toLanguages.ForEach(this.mTransSpeechConfig.AddTargetLanguage);
                this.mInputStream = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM(SAMPLESPERSECOND, BITSPERSAMPLE, NUMBEROFCHANNELS));

                this.mAudioConfig           = AudioConfig.FromStreamInput(this.mInputStream);
                this.mTranslationRecognizer = new TranslationRecognizer(this.mTransSpeechConfig, this.mAudioConfig);

                this.mTranslationRecognizer.Recognizing       += this.MSpeechRecognizer_Recognizing;
                this.mTranslationRecognizer.Recognized        += this.MSpeechRecognizer_Recognized;
                this.mTranslationRecognizer.SpeechEndDetected += this.MSpeechRecognizer_SpeechEndDetected;

                this.StartRecognisionIfNeeded();
            }
            catch (Exception ex)
            {
                _eventPublisher.Publish("MySTT Setup - Failed", $"Failed to initialize: {ex.Message}");
            }
        }
Example #2
0
        //private const string speechEndpoint = "https://YOUR_LOCATION.api.cognitive.microsoft.com/";

        //public async Task<IActionResult> OnGetAsync()
        //{
        //    return Page();
        //}

        public async Task <IActionResult> OnPostAsync()
        {
            var speechConfig = SpeechConfig.FromSubscription(speechKey, speechLocation);

            speechConfig.SpeechRecognitionLanguage = "ja-JP";

            byte[] readBytes;

            using var audioInputStream = AudioInputStream.CreatePushStream();
            using var reader           = new BinaryReader(VoiceFile.OpenReadStream());
            do
            {
                readBytes = reader.ReadBytes(1024);
                audioInputStream.Write(readBytes, readBytes.Length);
            } while (readBytes.Length > 0);

            var audioConfig = AudioConfig.FromStreamInput(audioInputStream);

            using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
            var result = await speechRecognizer.RecognizeOnceAsync();

            if (result.Reason == ResultReason.RecognizedSpeech)
            {
                Result         = "Œ‹‰Ê:";
                RecognizedText = result.Text;
            }

            return(Page());
        }
        public async Task <string> DetectLanguage(byte[] audioBytes, string fileExtension, string locale1, string locale2)
        {
            var wavBytes = ConvertToWaveBytes(audioBytes, fileExtension);

            var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { locale1, locale2 });

            var config          = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion);
            var stopRecognition = new TaskCompletionSource <int>();
            var detected        = new List <string>();

            using var pushStream = AudioInputStream.CreatePushStream();
            using (var audioInput = AudioConfig.FromStreamInput(pushStream))
            {
                using var recognizer = new SpeechRecognizer(
                          config,
                          autoDetectSourceLanguageConfig,
                          audioInput);
                pushStream.Write(wavBytes);
                pushStream.Close();

                recognizer.Recognized += (s, e) =>
                {
                    var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result);
                    var detectedLanguage = autoDetectSourceLanguageResult.Language;
                    detected.Add(detectedLanguage);
                    if (detected.Count > UtteranceCount)
                    {
                        stopRecognition.TrySetResult(0);
                    }
                };

                recognizer.SessionStopped += (s, e) =>
                {
                    stopRecognition.TrySetResult(0);
                };

                await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                var t = Task.Factory.StartNew(async() => { await SetTimeOutForRecognition(stopRecognition).ConfigureAwait(false); }, CancellationToken.None, TaskCreationOptions.None, TaskScheduler.Default);

                Task.WaitAny(new[] { stopRecognition.Task });

                await recognizer.StopKeywordRecognitionAsync().ConfigureAwait(false);
            }

            if (detected.Count == 0)
            {
                throw new TimeoutException("Did not get any language identification results back in time.");
            }

            var detectedByCount = detected.GroupBy(i => i);
            var mostFreq        = detectedByCount.OrderBy(t => t.Count()).LastOrDefault().Key;

            if (string.IsNullOrEmpty(mostFreq) || (!mostFreq.Equals(locale1, StringComparison.OrdinalIgnoreCase) && !mostFreq.Equals(locale2, StringComparison.OrdinalIgnoreCase)))
            {
                return(locale1);
            }

            return(mostFreq);
        }
Example #4
0
        private static async Task <string> UploadAudioAndStartRemoteTranscription(string key, string region)
        {
            AudioStreamFormat audioStreamFormat;

            var config = SpeechConfig.FromSubscription(key, region);

            config.SetProperty("ConversationTranscriptionInRoomAndOnline", "true");
            config.SetServiceProperty("transcriptionMode", "RealTimeAndAsync", ServicePropertyChannel.UriQueryParameter);
            var waveFilePullStream = OpenWavFile(@"katiesteve.wav", out audioStreamFormat);
            var audioInput         = AudioConfig.FromStreamInput(AudioInputStream.CreatePullStream(waveFilePullStream, audioStreamFormat));

            var meetingId = Guid.NewGuid().ToString();

            using (var conversation = await Conversation.CreateConversationAsync(config, meetingId))
            {
                using (var conversationTranscriber = TrackSessionId(new ConversationTranscriber(audioInput)))
                {
                    await conversationTranscriber.JoinConversationAsync(conversation);

                    await conversation.AddParticipantAsync("OneUserByUserId");

                    var user = User.FromUserId("CreateUserFromId and then add it");
                    await conversation.AddParticipantAsync(user);

                    var result = await GetRecognizerResult(conversationTranscriber, meetingId);
                }
            }
            return(meetingId);
        }
Example #5
0
        /// <summary>
        /// Returns speech to text from selected Opus audiofile streamed from a blobcontainer in Azure Storage.
        /// </summary>
        /// <param name="opusBlob">Name of opus file</param>
        /// <param name="container">Azure blob container name</param>
        /// <returns>List<Speech> container speechresults</returns>
        public async Task <List <Speech> > RunRecognitionAsync(string opusBlob, string container)
        {
            SpeechResult = new List <Speech>();

            var blobService = new BlobService();
            var blobClient  = await blobService.GetBlobFromContainerAsync(opusBlob, container);

            using var audioInputStream = AudioInputStream.CreatePushStream();
            using var audioConfig      = AudioConfig.FromStreamInput(audioInputStream);
            using (var recognizer = new SpeechRecognizer(_speechConfig, _languagesToDetect, audioConfig))
            {
                recognizer.Recognizing    += Recognizing;
                recognizer.Recognized     += Recognized;
                recognizer.SessionStarted += SessionStarted;
                recognizer.SessionStopped += SessionStopped;
                recognizer.Canceled       += SessionCanceled;

                await InjectStreamIntoRecognizerAsync(audioInputStream, blobClient);

                await recognizer.StartContinuousRecognitionAsync();

                Task.WaitAny(new[] { _stopRecognition.Task });
                await recognizer.StopContinuousRecognitionAsync();
            }

            return(SpeechResult);
        }
        /// <summary>
        /// Remote audio transcription of the given audioFile with CognitiveServices
        /// </summary>
        public static AnalysisResult TranscribeAudio(ref AnalysisResult audioResponse, IFormFile audioFile)
        {
            // needed for speaker diarization to resolve at the word level
            SPEECH_CONFIG.RequestWordLevelTimestamps();

            var audioFormat128 = AudioStreamFormat.GetWaveFormatPCM(8000, 16, 1);
            var audioFormat256 = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1);

            // load bytestream -> audio stream
            // load audio config from audio stream
            // initialize speech recognizer
            using (var br = new BinaryReader(audioFile.OpenReadStream()))
                using (var audioInputStream = AudioInputStream.CreatePushStream(audioFormat128))
                    using (var audioConfig = AudioConfig.FromStreamInput(audioInputStream))
                        using (var recognizer = new SpeechRecognizer(SPEECH_CONFIG, audioConfig))
                        {
                            long nbytes = audioFile.Length;
                            var  buff   = new List <byte>();

                            // read through bytes of audio
                            byte[] readBytes;
                            do
                            {
                                readBytes = br.ReadBytes(1024);
                                buff.AddRange(readBytes);
                                audioInputStream.Write(readBytes, readBytes.Length);
                            } while (readBytes.Length > 0);

                            var transcript = ExecuteRecognizer(recognizer).Result;
                            audioResponse.Transcript = transcript;
                            return(audioResponse);
                        }
        }
Example #7
0
        private async Task <PushAudioInputStream> CreatePushStreamAsync(Stream stream)
        {
            var read = 0;
            var recognitionStream = AudioInputStream.CreatePushStream();
            var buffer            = ArrayPool <byte> .Shared.Rent(80000);

            var sumRead = 0;

            try
            {
                while ((read = await stream.ReadAsync(buffer, 0, buffer.Length)) != 0)
                {
                    sumRead += read;
                    recognitionStream.Write(buffer, read);
                }
                recognitionStream.Close();
                if (sumRead == 0)
                {
                    return(null);
                }
                return(recognitionStream);
            }
            finally
            {
                ArrayPool <byte> .Shared.Return(buffer);
            }
        }
Example #8
0
        protected internal virtual void writeFile(string wavName)
        {
            AudioFormat audioFormat = new AudioFormat((float)this.sampleRate, this.bitsPerSample, 1, this.isSigned, true);

            AudioFileFormat.Type targetType = WavWriter.getTargetType("wav");
            byte[] array = this.baos.toByteArray();
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(array);
            InputStream          inputStream          = byteArrayInputStream;
            AudioFormat          audioFormat2         = audioFormat;
            int num       = array.Length;
            int frameSize = audioFormat.getFrameSize();
            AudioInputStream audioInputStream = new AudioInputStream(inputStream, audioFormat2, (long)((frameSize != -1) ? (num / frameSize) : (-(long)num)));
            File             file             = new File(wavName);

            if (AudioSystem.isFileTypeSupported(targetType, audioInputStream))
            {
                try
                {
                    AudioSystem.write(audioInputStream, targetType, file);
                }
                catch (IOException ex)
                {
                    Throwable.instancehelper_printStackTrace(ex);
                }
            }
        }
Example #9
0
 public AudioData(AudioInputStream ais)
 {
     this.__listeners    = new ArrayList();
     this.selectionStart = -1;
     this.selectionEnd   = -1;
     this.shorts         = Utils.toSignedPCM(ais);
     this.format         = new AudioFormat(ais.getFormat().getSampleRate(), 16, 1, true, false);
 }
Example #10
0
        public virtual void save(string fileName, AudioFileFormat.Type fileFormat)
        {
            File file = new File(fileName);

            byte[]           audio            = this.getAudio();
            AudioInputStream audioInputStream = new AudioInputStream(new ByteArrayInputStream(audio), this.getAudioFormat(), (long)audio.Length);

            AudioSystem.write(audioInputStream, fileFormat, file);
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="ContinuousSpeechRecognizer"/> class.
        /// </summary>
        /// <param name="pipeline">The pipeline in which to create the component.</param>
        /// <param name="subscriptionKey">The subscription key for the Azure speech resource.</param>
        /// <param name="region">The service region of the Azure speech resource.</param>
        public ContinuousSpeechRecognizer(Pipeline pipeline, string subscriptionKey, string region)
            : base(pipeline)
        {
            var config = SpeechConfig.FromSubscription(subscriptionKey, region);

            this.pushStream = AudioInputStream.CreatePushStream();
            this.audioInput = AudioConfig.FromStreamInput(this.pushStream);
            this.recognizer = new SpeechRecognizer(config, this.audioInput);
        }
            public AudioInputStream nextElement()
            {
                AudioInputStream result = null;

                if (this.lastFile == null)
                {
                    ConcatAudioFileDataSource.access_002(this.this_0, this.readNext());
                }
                if (ConcatAudioFileDataSource.access_000(this.this_0) != null)
                {
                    try
                    {
                        try
                        {
                            AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(ConcatAudioFileDataSource.access_000(this.this_0));
                            AudioFormat      format           = audioInputStream.getFormat();
                            if (!ConcatAudioFileDataSource.access_100(this.this_0))
                            {
                                ConcatAudioFileDataSource.access_102(this.this_0, true);
                                this.this_0.bigEndian     = format.isBigEndian();
                                this.this_0.sampleRate    = ByteCodeHelper.f2i(format.getSampleRate());
                                this.this_0.signedData    = format.getEncoding().equals(AudioFormat.Encoding.PCM_SIGNED);
                                this.this_0.bytesPerValue = format.getSampleSizeInBits() / 8;
                            }
                            if (format.getSampleRate() != (float)this.this_0.sampleRate || format.getChannels() != 1 || format.isBigEndian() != this.this_0.bigEndian)
                            {
                                string text = "format mismatch for subsequent files";

                                throw new RuntimeException(text);
                            }
                            result = audioInputStream;
                            ConcatAudioFileDataSource.access_200(this.this_0).finer(new StringBuilder().append("Strating processing of '").append(this.lastFile.getFile()).append('\'').toString());
                            Iterator iterator = this.this_0.__fileListeners.iterator();
                            while (iterator.hasNext())
                            {
                                AudioFileProcessListener audioFileProcessListener  = (AudioFileProcessListener)iterator.next();
                                AudioFileProcessListener audioFileProcessListener2 = audioFileProcessListener;
                                audioFileProcessListener2.audioFileProcStarted(new File(ConcatAudioFileDataSource.access_000(this.this_0).getFile()));
                            }
                            this.lastFile = ConcatAudioFileDataSource.access_000(this.this_0);
                            ConcatAudioFileDataSource.access_002(this.this_0, null);
                        }
                        catch (IOException ex)
                        {
                            Throwable.instancehelper_printStackTrace(ex);

                            throw new Error(new StringBuilder().append("Cannot convert ").append(ConcatAudioFileDataSource.access_000(this.this_0)).append(" to a FileInputStream").toString());
                        }
                    }
                    catch (UnsupportedAudioFileException ex3)
                    {
                        Throwable.instancehelper_printStackTrace(ex3);
                    }
                    return(result);
                }
                return(result);
            }
Example #13
0
        public AudioSegment(byte[] audioData, long startOffset, long endOffset,
                            uint sampleRate = SAMPLE_RATE, byte bitsPerSample = BITS_PER_SAMPLE, byte channels = CHANNELS)
        {
            MemoryStream      tempStream   = new MemoryStream(audioData);
            AudioStreamFormat streamFormat = AudioStreamFormat.GetWaveFormatPCM(sampleRate, bitsPerSample, channels);

            AudioStream = AudioInputStream.CreatePullStream(new BinaryAudioStreamReader(tempStream), streamFormat);

            AudioData   = audioData;
            StartOffset = startOffset;
            EndOffset   = endOffset;
        }
Example #14
0
        void DisconnectAudioStream()
        {
            if (_microphoneStream != null)
            {
                // Destroy AudioPreprocessorPlaybackListener
                if (_audioPreprocessorPlaybackListener != null)
                {
                    Destroy(_audioPreprocessorPlaybackListener);
                    _audioPreprocessorPlaybackListener = null;
                }

                // Dispose of audio preprocessor
                if (_audioPreprocessor != null)
                {
                    _audioPreprocessor.Dispose();
                    _audioPreprocessor = null;
                }

                // Close microphone stream
                _microphoneStream.Close();

                // Dispose microphone device
                if (_oculusMicrophoneDevice != null)
                {
                    _oculusMicrophoneDevice.Stop();
                    _oculusMicrophoneDevice.Dispose();
                    _oculusMicrophoneDevice = null;
                }
                if (_nativeMicrophoneDevice != null)
                {
                    _nativeMicrophoneDevice.Stop();
                    _nativeMicrophoneDevice.Dispose();
                    _nativeMicrophoneDevice = null;
                }
                if (_unityMicrophoneDevice != null)
                {
                    _unityMicrophoneDevice.Dispose();
                    _unityMicrophoneDevice = null;
                }

                // Clean up
                _unityMicrophoneDeviceDataReader = null;
                _microphoneStream = null;
            }

            // Remove audio output
            if (_audioOutput != null)
            {
                _audioOutput.Stop();
                Destroy(_audioOutput);
                _audioOutput = null;
            }
        }
        /// <summary>
        /// Constructs an <see cref="AudioConfig"/> from <see cref="Config"/>.
        /// Depending on the available services, this may either use the audio features built into the Speech SDK (such as <see cref="AudioConfig.FromDefaultMicrophoneInput"/>),
        /// or it may construct a <see cref="IStreamAudioSource"/> that accesses the requested <see cref="AudioDevice"/> with resampling and noise gates as required.
        /// </summary>
        /// <returns></returns>
        protected AudioConfig GetAudioConfig()
        {
            var streamSource = GetStreamAudioSource(Config.AudioSource);

            if (streamSource != null)
            {
                //use this stream source and convert to an Azure audio stream
                try
                {
                    var azureInput = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM(
                                                                           (uint)streamSource.Format.SampleRate,
                                                                           (byte)streamSource.Format.BitsPerSample,
                                                                           (byte)streamSource.Format.ChannelCount));

                    byte[] bufferOptional = null;
                    streamSource.DataAvailable += (s, e) =>
                    {
                        azureInput.Write(e.Buffer.GetArray(ref bufferOptional), e.Buffer.Count);
                    };
                    streamSource.Stopped += (s, e) =>
                    {
                        if (e.Cause == StreamAudioSourceStoppedCause.Stopped)
                        {
                            //signal end-of-stream to Azure
                            azureInput.Close();
                        }
                    };

                    this.StreamAudioSource = streamSource;
                    return(AudioConfig.FromStreamInput(azureInput));
                }
                catch (Exception ex)
                {
                    Logger.LogError(ex, $"Error while creating an Azure AudioConfig from an IStreamAudioSource. Format: SampleRate={streamSource.Format.SampleRate}, BitsPerSample={streamSource.Format.BitsPerSample}, Channels={streamSource.Format.ChannelCount}");
                    streamSource.Dispose();
                }
            }

            this.StreamAudioSource    = null;
            this.StreamAudioNoiseGate = null;

            //try and use the built-in audio engine
            if (Config.AudioSource is AudioDevice audioDevice)
            {
                if (audioDevice.UseDefaultAudioInputDevice)
                {
                    return(AudioConfig.FromDefaultMicrophoneInput());
                }
            }

            return(null);
        }
Example #16
0
    void Start()
    {
        if (outputText == null)
        {
            UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it.");
        }
        else if (recoButton == null)
        {
            message = "recoButton property is null! Assign a UI Button to it.";
            UnityEngine.Debug.LogError(message);
        }
        else
        {
            // Continue with normal initialization, Text and Button objects are present.
#if PLATFORM_ANDROID
            // Request to use the microphone, cf.
            // https://docs.unity3d.com/Manual/android-RequestingPermissions.html
            message = "Waiting for mic permission";
            if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
            {
                Permission.RequestUserPermission(Permission.Microphone);
            }
#elif PLATFORM_IOS
            if (!Application.HasUserAuthorization(UserAuthorization.Microphone))
            {
                Application.RequestUserAuthorization(UserAuthorization.Microphone);
            }
#else
            micPermissionGranted = true;
            message = "Click button to recognize speech";
#endif

            grabacionCompleta = new StringBuilder(200);

            config = SpeechConfig.FromSubscription("b899f4a3bc2b4b30b3e690476b1af952", "westus");
            config.SpeechRecognitionLanguage = "es-ES";
            pushStream              = AudioInputStream.CreatePushStream();
            audioInput              = AudioConfig.FromStreamInput(pushStream);
            recognizer              = new SpeechRecognizer(config, audioInput);
            recognizer.Recognizing += RecognizingHandler;
            recognizer.Recognized  += RecognizedHandler;
            recognizer.Canceled    += CanceledHandler;

            recoButton.onClick.AddListener(ButtonClick);
            foreach (var device in Microphone.devices)
            {
                Debug.Log("DeviceName: " + device);
            }
            audioSource = GameObject.Find("MyAudioSource").GetComponent <AudioSource>();
        }
    }
Example #17
0
        private static void dumpStreamChunk(File file, string text, long num, long num2)
        {
            AudioFileFormat  audioFileFormat  = AudioSystem.getAudioFileFormat(file);
            AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
            AudioFormat      format           = audioFileFormat.getFormat();
            int num3 = Math.round((float)format.getFrameSize() * format.getFrameRate() / 1000f);

            audioInputStream.skip(num * (long)num3);
            AudioInputStream audioInputStream2 = new AudioInputStream(audioInputStream, format, num2 * (long)num3);

            AudioSystem.write(audioInputStream2, audioFileFormat.getType(), new File(text));
            audioInputStream.close();
            audioInputStream2.close();
        }
Example #18
0
 void ConfigureSpeechRecognizer()
 {
     _speechConfig = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion);
     _speechConfig.SpeechRecognitionLanguage = "es-US";
     _speechConfig.OutputFormat = OutputFormat.Detailed;
     _pushStream                    = AudioInputStream.CreatePushStream();
     _audioInput                    = AudioConfig.FromStreamInput(_pushStream);
     _speechRecognizer              = new SpeechRecognizer(_speechConfig, _audioInput);
     _speechRecognizer.Recognizing += SpeechRecognizingHandler;
     _speechRecognizer.Recognized  += SpeechRecognizedHandler;
     _speechRecognizer.Canceled    += SpeechCanceledHandler;
     _audioSource                   = GameObject.Find("AudioSource").GetComponent <AudioSource>();
     _audioSource.loop              = false;
     _audioSource.playOnAwake       = false;
 }
Example #19
0
    void Start()
    {
        if (outputText == null)
        {
            UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it.");
        }
        else if (recoButton == null)
        {
            _message = "recoButton property is null! Assign a UI Button to it.";
            UnityEngine.Debug.LogError(_message);
        }
        else
        {
            // Continue with normal initialization, Text and Button objects are present.
#if PLATFORM_ANDROID
            // Request to use the microphone, cf.
            // https://docs.unity3d.com/Manual/android-RequestingPermissions.html
            message = "Waiting for mic permission";
            if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
            {
                Permission.RequestUserPermission(Permission.Microphone);
            }
#elif PLATFORM_IOS
            if (!Application.HasUserAuthorization(UserAuthorization.Microphone))
            {
                Application.RequestUserAuthorization(UserAuthorization.Microphone);
            }
#else
            _micPermissionGranted = true;
            _message = "Click button to recognize speech";
#endif
            _config = SpeechTranslationConfig.FromSubscription(SubscriptionKey, SubscriptionRegion);
            _config.SpeechRecognitionLanguage = "es-US";
            _config.AddTargetLanguage("en-US");
            _pushStream              = AudioInputStream.CreatePushStream();
            _audioInput              = AudioConfig.FromStreamInput(_pushStream);
            _recognizer              = new TranslationRecognizer(_config, _audioInput);
            _recognizer.Recognizing += RecognizingHandler;
            _recognizer.Recognized  += RecognizedHandler;
            _recognizer.Canceled    += CanceledHandler;

            foreach (var device in Microphone.devices)
            {
                Debug.Log("DeviceName: " + device);
            }
            _audioSource = GameObject.Find("AudioSource").GetComponent <AudioSource>();
        }
    }
Example #20
0
    public async UniTask STTBytes(byte[] readBytes, int sampleRate, int bitRate, int channels)
    {
        var speechConfig = SpeechConfig.FromSubscription(subscription_key, region);

        speechConfig.SpeechRecognitionLanguage = location;
        var audioStreamFormat = AudioStreamFormat.GetWaveFormatPCM((uint)sampleRate, (byte)bitRate, (byte)channels);
        var audioInputStream  = AudioInputStream.CreatePushStream(audioStreamFormat);
        var audioConfig       = AudioConfig.FromStreamInput(audioInputStream);
        var recognizer        = new SpeechRecognizer(speechConfig, audioConfig);

        audioInputStream.Write(readBytes, readBytes.Length);

        var result = await recognizer.RecognizeOnceAsync();

        Debug.Log($"Recognized Line : = {result.Text}");
    }
        public async Task Start()
        {
            var config = SpeechConfig.FromSubscription(_projectSettings.AzureSpeechServiceSubscriptionKey, _projectSettings.AzureSpeechServiceRegionName);

            var audioFormat = AudioStreamFormat.GetWaveFormatPCM(8000, 16, 1);

            _inputStream = AudioInputStream.CreatePushStream(audioFormat);
            _audioInput  = AudioConfig.FromStreamInput(_inputStream);

            _recognizer = new SpeechRecognizer(config, _audioInput);
            _recognizer.SessionStarted += RecognizerStarted;
            _recognizer.Recognized     += RecognizerRecognized;
            _recognizer.Canceled       += RecognizerCancelled;

            await _recognizer.StartContinuousRecognitionAsync();
        }
Example #22
0
        private void Init(string from, string to)
        {
            this.toLanguage = to;

            Profile       = MediaEncodingProfile.CreateWav(AudioEncodingQuality.Low);
            Profile.Audio = AudioEncodingProperties.CreatePcm(16000, 1, 16);

            byte channels         = 1;
            byte bitsPerSample    = 16;
            uint samplesPerSecond = 16000; // or 8000
            var  audioFormat      = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels);

            // Init Push Stream

            pushStream = AudioInputStream.CreatePushStream(audioFormat);

            if (from == to)
            {
                var config = SpeechConfig.FromSubscription(apiKey, region);
                config.SpeechRecognitionLanguage = from;

                speechRecognizer = new SpeechRecognizer(config, AudioConfig.FromStreamInput(pushStream));

                speechRecognizer.Recognizing += RecognisingSpeechHandler;
                speechRecognizer.Recognized  += RecognisingSpeechHandler;

                speechRecognizer.SessionStarted += (sender, args) => this.RecognisionStarted?.Invoke();
                speechRecognizer.SessionStopped += (sender, args) => this.RecognisionStopped?.Invoke();
            }
            else
            {
                var config = SpeechTranslationConfig.FromSubscription(apiKey, region);
                config.SpeechRecognitionLanguage = from;
                config.AddTargetLanguage(to);

                translationRecognizer = new TranslationRecognizer(config, AudioConfig.FromStreamInput(pushStream));

                translationRecognizer.SessionStarted += (sender, args) => this.RecognisionStarted?.Invoke();
                translationRecognizer.SessionStopped += (sender, args) => this.RecognisionStopped?.Invoke();

                translationRecognizer.Recognizing += RecognisingTranslationHandler;
                translationRecognizer.Recognized  += RecognisingTranslationHandler;
            }
        }
Example #23
0
        public static short[] toSignedPCM(AudioInputStream ais)
        {
            AudioFormat format    = ais.getFormat();
            int         num       = ais.available();
            int         frameSize = format.getFrameSize();

            short[] array  = new short[(frameSize != -1) ? (num / frameSize) : (-num)];
            byte[]  array2 = new byte[format.getFrameSize()];
            int     num2   = 0;

            while (ais.read(array2) != -1)
            {
                short[] array3 = array;
                int     num3   = num2;
                num2++;
                array3[num3] = Utils.bytesToShort(format, array2);
            }
            return(array);
        }
Example #24
0
        async static Task FromStream(SpeechConfig speechConfig)
        {
            var reader = new BinaryReader(File.OpenRead(DEMO_FILE));

            Console.WriteLine(reader.ToString());
            using var audioInputStream = AudioInputStream.CreatePushStream();
            using var audioConfig      = AudioConfig.FromStreamInput(audioInputStream);
            using var recognizer       = new SpeechRecognizer(speechConfig, audioConfig);

            byte[] readBytes;
            do
            {
                readBytes = reader.ReadBytes(1024);
                audioInputStream.Write(readBytes, readBytes.Length);
            } while (readBytes.Length > 0);

            var result = await recognizer.RecognizeOnceAsync();

            Console.WriteLine($"RECOGNIZED: Text={result.Text}");
        }
        public AzureSpeechRecognizer(string key, string region, WaveStream stream)
        {
            var speechConfig = SpeechConfig.FromSubscription(key, region);

            this.stream     = NormalizeStream(stream);
            this.pushStream = AudioInputStream.CreatePushStream();
            this.recognizer = new SpeechRecognizer(speechConfig, AudioConfig.FromStreamInput(this.pushStream));
            this.resultId   = Guid.NewGuid().ToString();
            this.lockObj    = new object();

            this.recognizer.Recognized += (snd, evt) =>
            {
                string id = null;
                lock (this.lockObj)
                {
                    id            = this.resultId;
                    this.resultId = Guid.NewGuid().ToString();
                }

                if (!string.IsNullOrWhiteSpace(evt.Result.Text))
                {
                    this.SpeechRecognized?.Invoke(this, new RecognitionEventArgs(evt, id));
                }
            };

            this.recognizer.Recognizing += (snd, evt) =>
            {
                string id = null;
                lock (this.lockObj)
                {
                    id = this.resultId;
                }

                this.SpeechPredicted?.Invoke(this, new RecognitionEventArgs(evt, id));
            };

            this.recognizer.Canceled += (snd, evt) =>
            {
                Debug.WriteLine("lost recognizer");
            };
        }
Example #26
0
        public static AudioData readAudioFile(string filename)
        {
            AudioData result;

            try
            {
                BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(filename));
                AudioInputStream    audioInputStream    = AudioSystem.getAudioInputStream(bufferedInputStream);
                AudioData           audioData           = new AudioData(audioInputStream);
                bufferedInputStream.close();
                result = audioData;
            }
            catch (UnsupportedAudioFileException)
            {
                goto IL_31;
            }
            return(result);

IL_31:
            return(null);
        }
Example #27
0
        /// <summary>语音转文字 从内存流识别</summary>
        public static async Task <string> RecognizeFromStreamAsync(string inputFileName)
        {
            var config = SpeechConfig.FromSubscription(subscriptionKey, region);

            var reader = new BinaryReader(File.OpenRead(inputFileName));

            using var audioInputStream = AudioInputStream.CreatePushStream();
            using var audioConfig      = AudioConfig.FromStreamInput(audioInputStream);
            using var recognizer       = new SpeechRecognizer(config, audioConfig);

            byte[] readBytes;
            do
            {
                readBytes = reader.ReadBytes(1024);
                audioInputStream.Write(readBytes, readBytes.Length);
            } while (readBytes.Length > 0);

            var result = await recognizer.RecognizeOnceAsync();

            return(result.Text);
        }
        public virtual void setInputStream(AudioInputStream inputStream, string streamName)
        {
            this.dataStream       = inputStream;
            this.streamEndReached = false;
            this.utteranceEndSent = false;
            this.utteranceStarted = false;
            AudioFormat format = inputStream.getFormat();

            this.sampleRate = ByteCodeHelper.f2i(format.getSampleRate());
            this.bigEndian  = format.isBigEndian();
            string text = format.toString();

            this.logger.finer(new StringBuilder().append("input format is ").append(text).toString());
            bool sampleSizeInBits = format.getSampleSizeInBits() != 0;
            int  num = 8;

            if (num != -1 && (sampleSizeInBits ? 1 : 0) % num != 0)
            {
                string text2 = "StreamDataSource: bits per sample must be a multiple of 8.";

                throw new Error(text2);
            }
            this.bytesPerValue = format.getSampleSizeInBits() / 8;
            AudioFormat.Encoding encoding = format.getEncoding();
            if (encoding.equals(AudioFormat.Encoding.PCM_SIGNED))
            {
                this.signedData = true;
            }
            else
            {
                if (!encoding.equals(AudioFormat.Encoding.PCM_UNSIGNED))
                {
                    string text3 = "used file encoding is not supported";

                    throw new RuntimeException(text3);
                }
                this.signedData = false;
            }
            this.totalValuesRead = 0L;
        }
        /// <summary>
        /// Creates Recognizer with baseline model and selected language:
        /// Creates a config with subscription key and selected region
        /// If input source is audio file, creates recognizer with audio file otherwise with default mic
        /// Waits on RunRecognition.
        /// </summary>
        private async Task CreateRecognizer(byte[] channel)
        {
            // Todo: suport users to specifiy a different region.
            var config = SpeechConfig.FromSubscription(this.SubscriptionKey, this.Region);

            config.SpeechRecognitionLanguage = this.RecognitionLanguage;
            config.OutputFormat = OutputFormat.Detailed;

            SpeechRecognizer basicRecognizer;

            PushAudioInputStream pushStream = AudioInputStream.CreatePushStream();

            pushStream.Write(channel);
            pushStream.Close();
            using (var audioInput = AudioConfig.FromStreamInput(pushStream))
            {
                using (basicRecognizer = new SpeechRecognizer(config, audioInput))
                {
                    await this.RunRecognizer(basicRecognizer, stopBaseRecognitionTaskCompletionSource).ConfigureAwait(false);
                }
            }
        }
Example #30
0
        public async Task <string> AudioToTextAsync(byte[] pcm)
        {
            var guid = Guid.NewGuid();

            if (!Text.ContainsKey(guid))
            {
                Text[guid] = null;
            }

            // Build out the speech recognizer
            using (var pushStream = AudioInputStream.CreatePushStream(AudioStreamFormat.GetDefaultInputFormat()))
                using (var audioInput = AudioConfig.FromStreamInput(pushStream))
                    using (var recognizer = new SpeechRecognizer(SpeechConfig, audioInput))
                    {
                        // Subscribe to speech recognizer events.
                        recognizer.SessionStarted += OnSpeechRecognitionSessionStarted;
                        recognizer.Recognizing    += OnSpeechRecognizing;
                        recognizer.Recognized     += (s, e) => OnSpeechRecognized(s, e, guid);
                        recognizer.Canceled       += OnSpeechCanceled;
                        recognizer.SessionStopped += OnSpeechRecognitionSessionStopped;

                        // Start continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition.
                        await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);

                        // Send the pcm data to the speech recognizer
                        pushStream.Write(pcm);
                        pushStream.Close();

                        // Wait for completion.
                        // Use Task.WaitAny to keep the task rooted.
                        Task.WaitAny(StopRecognition.Task);

                        // Stop recognition.
                        await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);

                        return(Text[guid]);
                    }
        }
 internal TargetDataLineInputStream(AudioInputStream _enclosing, TargetDataLine
     line)
     : base()
 {
     this._enclosing = _enclosing;
     this.line = line;
 }
 /// <summary>
 /// Indicates whether an audio file of the type specified can be written
 /// from the audio input stream indicated.
 /// </summary>
 /// <remarks>
 /// Indicates whether an audio file of the type specified can be written
 /// from the audio input stream indicated.
 /// </remarks>
 /// <param name="fileType">file type for which write capabilities are queried</param>
 /// <param name="stream">for which file writing support is queried</param>
 /// <returns>
 /// <code>true</code> if the file type is supported for this audio input stream,
 /// otherwise <code>false</code>
 /// </returns>
 public virtual bool isFileTypeSupported(AudioFileFormat.Type fileType, AudioInputStream
     stream)
 {
     AudioFileFormat.Type[] types = getAudioFileTypes(stream);
     for (int i = 0; i < types.Length; i++)
     {
         if (fileType.Equals(types[i]))
         {
             return true;
         }
     }
     return false;
 }
 /// <summary>
 /// Obtains the file types that this audio file writer can write from the
 /// audio input stream specified.
 /// </summary>
 /// <remarks>
 /// Obtains the file types that this audio file writer can write from the
 /// audio input stream specified.
 /// </remarks>
 /// <param name="stream">
 /// the audio input stream for which audio file type support
 /// is queried
 /// </param>
 /// <returns>
 /// array of file types.  If no file types are supported,
 /// an array of length 0 is returned.
 /// </returns>
 public abstract AudioFileFormat.Type[] getAudioFileTypes(AudioInputStream
     stream);
 /// <summary>
 /// Writes a stream of bytes representing an audio file of the file format
 /// indicated to the external file provided.
 /// </summary>
 /// <remarks>
 /// Writes a stream of bytes representing an audio file of the file format
 /// indicated to the external file provided.
 /// </remarks>
 /// <param name="stream">
 /// the audio input stream containing audio data to be
 /// written to the file
 /// </param>
 /// <param name="fileType">file type to be written to the file</param>
 /// <param name="out">external file to which the file data should be written</param>
 /// <returns>the number of bytes written to the file</returns>
 /// <exception cref="System.IO.IOException">if an I/O exception occurs</exception>
 /// <exception cref="System.ArgumentException">
 /// if the file format is not supported by
 /// the system
 /// </exception>
 /// <seealso cref="isFileTypeSupported(Type)">isFileTypeSupported(Type)</seealso>
 /// <seealso cref="getAudioFileTypes()">getAudioFileTypes()</seealso>
 public abstract int write(AudioInputStream stream, AudioFileFormat.Type
     fileType, java.io.File @out);
 /// <summary>
 /// Obtains an audio input stream with the specified encoding from the given audio
 /// input stream.
 /// </summary>
 /// <remarks>
 /// Obtains an audio input stream with the specified encoding from the given audio
 /// input stream.
 /// </remarks>
 /// <param name="targetEncoding">desired encoding of the stream after processing</param>
 /// <param name="sourceStream">stream from which data to be processed should be read</param>
 /// <returns>stream from which processed data with the specified target encoding may be read
 /// 	</returns>
 /// <exception cref="System.ArgumentException">
 /// if the format combination supplied is
 /// not supported.
 /// </exception>
 public abstract AudioInputStream getAudioInputStream(AudioFormat.Encoding
     targetEncoding, AudioInputStream sourceStream);
 /// <summary>
 /// Obtains an audio input stream with the specified format from the given audio
 /// input stream.
 /// </summary>
 /// <remarks>
 /// Obtains an audio input stream with the specified format from the given audio
 /// input stream.
 /// </remarks>
 /// <param name="targetFormat">desired data format of the stream after processing</param>
 /// <param name="sourceStream">stream from which data to be processed should be read</param>
 /// <returns>stream from which processed data with the specified format may be read</returns>
 /// <exception cref="System.ArgumentException">
 /// if the format combination supplied is
 /// not supported.
 /// </exception>
 public abstract AudioInputStream getAudioInputStream(AudioFormat
     targetFormat, AudioInputStream sourceStream);