private void SetupTranscriptionAndTranslationService() { try { var lCognitiveKey = _settings.AzureCognitiveKey; var lCognitiveRegion = _settings.AzureCognitiveRegion; _eventPublisher.Publish("MySTT Setup", $"Got region: {lCognitiveRegion}, key starting from: {lCognitiveKey??lCognitiveKey.Substring(0, lCognitiveKey.Length /2)}"); this.mTransSpeechConfig = SpeechTranslationConfig.FromSubscription(lCognitiveKey, lCognitiveRegion); var fromLanguage = "en-US"; var toLanguages = new List <string> { "el-GR" }; //var toLanguages = new List<string> { "ru-RU" }; this.mTransSpeechConfig.SpeechRecognitionLanguage = fromLanguage; toLanguages.ForEach(this.mTransSpeechConfig.AddTargetLanguage); this.mInputStream = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM(SAMPLESPERSECOND, BITSPERSAMPLE, NUMBEROFCHANNELS)); this.mAudioConfig = AudioConfig.FromStreamInput(this.mInputStream); this.mTranslationRecognizer = new TranslationRecognizer(this.mTransSpeechConfig, this.mAudioConfig); this.mTranslationRecognizer.Recognizing += this.MSpeechRecognizer_Recognizing; this.mTranslationRecognizer.Recognized += this.MSpeechRecognizer_Recognized; this.mTranslationRecognizer.SpeechEndDetected += this.MSpeechRecognizer_SpeechEndDetected; this.StartRecognisionIfNeeded(); } catch (Exception ex) { _eventPublisher.Publish("MySTT Setup - Failed", $"Failed to initialize: {ex.Message}"); } }
//private const string speechEndpoint = "https://YOUR_LOCATION.api.cognitive.microsoft.com/"; //public async Task<IActionResult> OnGetAsync() //{ // return Page(); //} public async Task <IActionResult> OnPostAsync() { var speechConfig = SpeechConfig.FromSubscription(speechKey, speechLocation); speechConfig.SpeechRecognitionLanguage = "ja-JP"; byte[] readBytes; using var audioInputStream = AudioInputStream.CreatePushStream(); using var reader = new BinaryReader(VoiceFile.OpenReadStream()); do { readBytes = reader.ReadBytes(1024); audioInputStream.Write(readBytes, readBytes.Length); } while (readBytes.Length > 0); var audioConfig = AudioConfig.FromStreamInput(audioInputStream); using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig); var result = await speechRecognizer.RecognizeOnceAsync(); if (result.Reason == ResultReason.RecognizedSpeech) { Result = "Œ‹‰Ê:"; RecognizedText = result.Text; } return(Page()); }
public async Task <string> DetectLanguage(byte[] audioBytes, string fileExtension, string locale1, string locale2) { var wavBytes = ConvertToWaveBytes(audioBytes, fileExtension); var autoDetectSourceLanguageConfig = AutoDetectSourceLanguageConfig.FromLanguages(new string[] { locale1, locale2 }); var config = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion); var stopRecognition = new TaskCompletionSource <int>(); var detected = new List <string>(); using var pushStream = AudioInputStream.CreatePushStream(); using (var audioInput = AudioConfig.FromStreamInput(pushStream)) { using var recognizer = new SpeechRecognizer( config, autoDetectSourceLanguageConfig, audioInput); pushStream.Write(wavBytes); pushStream.Close(); recognizer.Recognized += (s, e) => { var autoDetectSourceLanguageResult = AutoDetectSourceLanguageResult.FromResult(e.Result); var detectedLanguage = autoDetectSourceLanguageResult.Language; detected.Add(detectedLanguage); if (detected.Count > UtteranceCount) { stopRecognition.TrySetResult(0); } }; recognizer.SessionStopped += (s, e) => { stopRecognition.TrySetResult(0); }; await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); var t = Task.Factory.StartNew(async() => { await SetTimeOutForRecognition(stopRecognition).ConfigureAwait(false); }, CancellationToken.None, TaskCreationOptions.None, TaskScheduler.Default); Task.WaitAny(new[] { stopRecognition.Task }); await recognizer.StopKeywordRecognitionAsync().ConfigureAwait(false); } if (detected.Count == 0) { throw new TimeoutException("Did not get any language identification results back in time."); } var detectedByCount = detected.GroupBy(i => i); var mostFreq = detectedByCount.OrderBy(t => t.Count()).LastOrDefault().Key; if (string.IsNullOrEmpty(mostFreq) || (!mostFreq.Equals(locale1, StringComparison.OrdinalIgnoreCase) && !mostFreq.Equals(locale2, StringComparison.OrdinalIgnoreCase))) { return(locale1); } return(mostFreq); }
private static async Task <string> UploadAudioAndStartRemoteTranscription(string key, string region) { AudioStreamFormat audioStreamFormat; var config = SpeechConfig.FromSubscription(key, region); config.SetProperty("ConversationTranscriptionInRoomAndOnline", "true"); config.SetServiceProperty("transcriptionMode", "RealTimeAndAsync", ServicePropertyChannel.UriQueryParameter); var waveFilePullStream = OpenWavFile(@"katiesteve.wav", out audioStreamFormat); var audioInput = AudioConfig.FromStreamInput(AudioInputStream.CreatePullStream(waveFilePullStream, audioStreamFormat)); var meetingId = Guid.NewGuid().ToString(); using (var conversation = await Conversation.CreateConversationAsync(config, meetingId)) { using (var conversationTranscriber = TrackSessionId(new ConversationTranscriber(audioInput))) { await conversationTranscriber.JoinConversationAsync(conversation); await conversation.AddParticipantAsync("OneUserByUserId"); var user = User.FromUserId("CreateUserFromId and then add it"); await conversation.AddParticipantAsync(user); var result = await GetRecognizerResult(conversationTranscriber, meetingId); } } return(meetingId); }
/// <summary> /// Returns speech to text from selected Opus audiofile streamed from a blobcontainer in Azure Storage. /// </summary> /// <param name="opusBlob">Name of opus file</param> /// <param name="container">Azure blob container name</param> /// <returns>List<Speech> container speechresults</returns> public async Task <List <Speech> > RunRecognitionAsync(string opusBlob, string container) { SpeechResult = new List <Speech>(); var blobService = new BlobService(); var blobClient = await blobService.GetBlobFromContainerAsync(opusBlob, container); using var audioInputStream = AudioInputStream.CreatePushStream(); using var audioConfig = AudioConfig.FromStreamInput(audioInputStream); using (var recognizer = new SpeechRecognizer(_speechConfig, _languagesToDetect, audioConfig)) { recognizer.Recognizing += Recognizing; recognizer.Recognized += Recognized; recognizer.SessionStarted += SessionStarted; recognizer.SessionStopped += SessionStopped; recognizer.Canceled += SessionCanceled; await InjectStreamIntoRecognizerAsync(audioInputStream, blobClient); await recognizer.StartContinuousRecognitionAsync(); Task.WaitAny(new[] { _stopRecognition.Task }); await recognizer.StopContinuousRecognitionAsync(); } return(SpeechResult); }
/// <summary> /// Remote audio transcription of the given audioFile with CognitiveServices /// </summary> public static AnalysisResult TranscribeAudio(ref AnalysisResult audioResponse, IFormFile audioFile) { // needed for speaker diarization to resolve at the word level SPEECH_CONFIG.RequestWordLevelTimestamps(); var audioFormat128 = AudioStreamFormat.GetWaveFormatPCM(8000, 16, 1); var audioFormat256 = AudioStreamFormat.GetWaveFormatPCM(16000, 16, 1); // load bytestream -> audio stream // load audio config from audio stream // initialize speech recognizer using (var br = new BinaryReader(audioFile.OpenReadStream())) using (var audioInputStream = AudioInputStream.CreatePushStream(audioFormat128)) using (var audioConfig = AudioConfig.FromStreamInput(audioInputStream)) using (var recognizer = new SpeechRecognizer(SPEECH_CONFIG, audioConfig)) { long nbytes = audioFile.Length; var buff = new List <byte>(); // read through bytes of audio byte[] readBytes; do { readBytes = br.ReadBytes(1024); buff.AddRange(readBytes); audioInputStream.Write(readBytes, readBytes.Length); } while (readBytes.Length > 0); var transcript = ExecuteRecognizer(recognizer).Result; audioResponse.Transcript = transcript; return(audioResponse); } }
private async Task <PushAudioInputStream> CreatePushStreamAsync(Stream stream) { var read = 0; var recognitionStream = AudioInputStream.CreatePushStream(); var buffer = ArrayPool <byte> .Shared.Rent(80000); var sumRead = 0; try { while ((read = await stream.ReadAsync(buffer, 0, buffer.Length)) != 0) { sumRead += read; recognitionStream.Write(buffer, read); } recognitionStream.Close(); if (sumRead == 0) { return(null); } return(recognitionStream); } finally { ArrayPool <byte> .Shared.Return(buffer); } }
protected internal virtual void writeFile(string wavName) { AudioFormat audioFormat = new AudioFormat((float)this.sampleRate, this.bitsPerSample, 1, this.isSigned, true); AudioFileFormat.Type targetType = WavWriter.getTargetType("wav"); byte[] array = this.baos.toByteArray(); ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(array); InputStream inputStream = byteArrayInputStream; AudioFormat audioFormat2 = audioFormat; int num = array.Length; int frameSize = audioFormat.getFrameSize(); AudioInputStream audioInputStream = new AudioInputStream(inputStream, audioFormat2, (long)((frameSize != -1) ? (num / frameSize) : (-(long)num))); File file = new File(wavName); if (AudioSystem.isFileTypeSupported(targetType, audioInputStream)) { try { AudioSystem.write(audioInputStream, targetType, file); } catch (IOException ex) { Throwable.instancehelper_printStackTrace(ex); } } }
public AudioData(AudioInputStream ais) { this.__listeners = new ArrayList(); this.selectionStart = -1; this.selectionEnd = -1; this.shorts = Utils.toSignedPCM(ais); this.format = new AudioFormat(ais.getFormat().getSampleRate(), 16, 1, true, false); }
public virtual void save(string fileName, AudioFileFormat.Type fileFormat) { File file = new File(fileName); byte[] audio = this.getAudio(); AudioInputStream audioInputStream = new AudioInputStream(new ByteArrayInputStream(audio), this.getAudioFormat(), (long)audio.Length); AudioSystem.write(audioInputStream, fileFormat, file); }
/// <summary> /// Initializes a new instance of the <see cref="ContinuousSpeechRecognizer"/> class. /// </summary> /// <param name="pipeline">The pipeline in which to create the component.</param> /// <param name="subscriptionKey">The subscription key for the Azure speech resource.</param> /// <param name="region">The service region of the Azure speech resource.</param> public ContinuousSpeechRecognizer(Pipeline pipeline, string subscriptionKey, string region) : base(pipeline) { var config = SpeechConfig.FromSubscription(subscriptionKey, region); this.pushStream = AudioInputStream.CreatePushStream(); this.audioInput = AudioConfig.FromStreamInput(this.pushStream); this.recognizer = new SpeechRecognizer(config, this.audioInput); }
public AudioInputStream nextElement() { AudioInputStream result = null; if (this.lastFile == null) { ConcatAudioFileDataSource.access_002(this.this_0, this.readNext()); } if (ConcatAudioFileDataSource.access_000(this.this_0) != null) { try { try { AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(ConcatAudioFileDataSource.access_000(this.this_0)); AudioFormat format = audioInputStream.getFormat(); if (!ConcatAudioFileDataSource.access_100(this.this_0)) { ConcatAudioFileDataSource.access_102(this.this_0, true); this.this_0.bigEndian = format.isBigEndian(); this.this_0.sampleRate = ByteCodeHelper.f2i(format.getSampleRate()); this.this_0.signedData = format.getEncoding().equals(AudioFormat.Encoding.PCM_SIGNED); this.this_0.bytesPerValue = format.getSampleSizeInBits() / 8; } if (format.getSampleRate() != (float)this.this_0.sampleRate || format.getChannels() != 1 || format.isBigEndian() != this.this_0.bigEndian) { string text = "format mismatch for subsequent files"; throw new RuntimeException(text); } result = audioInputStream; ConcatAudioFileDataSource.access_200(this.this_0).finer(new StringBuilder().append("Strating processing of '").append(this.lastFile.getFile()).append('\'').toString()); Iterator iterator = this.this_0.__fileListeners.iterator(); while (iterator.hasNext()) { AudioFileProcessListener audioFileProcessListener = (AudioFileProcessListener)iterator.next(); AudioFileProcessListener audioFileProcessListener2 = audioFileProcessListener; audioFileProcessListener2.audioFileProcStarted(new File(ConcatAudioFileDataSource.access_000(this.this_0).getFile())); } this.lastFile = ConcatAudioFileDataSource.access_000(this.this_0); ConcatAudioFileDataSource.access_002(this.this_0, null); } catch (IOException ex) { Throwable.instancehelper_printStackTrace(ex); throw new Error(new StringBuilder().append("Cannot convert ").append(ConcatAudioFileDataSource.access_000(this.this_0)).append(" to a FileInputStream").toString()); } } catch (UnsupportedAudioFileException ex3) { Throwable.instancehelper_printStackTrace(ex3); } return(result); } return(result); }
public AudioSegment(byte[] audioData, long startOffset, long endOffset, uint sampleRate = SAMPLE_RATE, byte bitsPerSample = BITS_PER_SAMPLE, byte channels = CHANNELS) { MemoryStream tempStream = new MemoryStream(audioData); AudioStreamFormat streamFormat = AudioStreamFormat.GetWaveFormatPCM(sampleRate, bitsPerSample, channels); AudioStream = AudioInputStream.CreatePullStream(new BinaryAudioStreamReader(tempStream), streamFormat); AudioData = audioData; StartOffset = startOffset; EndOffset = endOffset; }
void DisconnectAudioStream() { if (_microphoneStream != null) { // Destroy AudioPreprocessorPlaybackListener if (_audioPreprocessorPlaybackListener != null) { Destroy(_audioPreprocessorPlaybackListener); _audioPreprocessorPlaybackListener = null; } // Dispose of audio preprocessor if (_audioPreprocessor != null) { _audioPreprocessor.Dispose(); _audioPreprocessor = null; } // Close microphone stream _microphoneStream.Close(); // Dispose microphone device if (_oculusMicrophoneDevice != null) { _oculusMicrophoneDevice.Stop(); _oculusMicrophoneDevice.Dispose(); _oculusMicrophoneDevice = null; } if (_nativeMicrophoneDevice != null) { _nativeMicrophoneDevice.Stop(); _nativeMicrophoneDevice.Dispose(); _nativeMicrophoneDevice = null; } if (_unityMicrophoneDevice != null) { _unityMicrophoneDevice.Dispose(); _unityMicrophoneDevice = null; } // Clean up _unityMicrophoneDeviceDataReader = null; _microphoneStream = null; } // Remove audio output if (_audioOutput != null) { _audioOutput.Stop(); Destroy(_audioOutput); _audioOutput = null; } }
/// <summary> /// Constructs an <see cref="AudioConfig"/> from <see cref="Config"/>. /// Depending on the available services, this may either use the audio features built into the Speech SDK (such as <see cref="AudioConfig.FromDefaultMicrophoneInput"/>), /// or it may construct a <see cref="IStreamAudioSource"/> that accesses the requested <see cref="AudioDevice"/> with resampling and noise gates as required. /// </summary> /// <returns></returns> protected AudioConfig GetAudioConfig() { var streamSource = GetStreamAudioSource(Config.AudioSource); if (streamSource != null) { //use this stream source and convert to an Azure audio stream try { var azureInput = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM( (uint)streamSource.Format.SampleRate, (byte)streamSource.Format.BitsPerSample, (byte)streamSource.Format.ChannelCount)); byte[] bufferOptional = null; streamSource.DataAvailable += (s, e) => { azureInput.Write(e.Buffer.GetArray(ref bufferOptional), e.Buffer.Count); }; streamSource.Stopped += (s, e) => { if (e.Cause == StreamAudioSourceStoppedCause.Stopped) { //signal end-of-stream to Azure azureInput.Close(); } }; this.StreamAudioSource = streamSource; return(AudioConfig.FromStreamInput(azureInput)); } catch (Exception ex) { Logger.LogError(ex, $"Error while creating an Azure AudioConfig from an IStreamAudioSource. Format: SampleRate={streamSource.Format.SampleRate}, BitsPerSample={streamSource.Format.BitsPerSample}, Channels={streamSource.Format.ChannelCount}"); streamSource.Dispose(); } } this.StreamAudioSource = null; this.StreamAudioNoiseGate = null; //try and use the built-in audio engine if (Config.AudioSource is AudioDevice audioDevice) { if (audioDevice.UseDefaultAudioInputDevice) { return(AudioConfig.FromDefaultMicrophoneInput()); } } return(null); }
void Start() { if (outputText == null) { UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it."); } else if (recoButton == null) { message = "recoButton property is null! Assign a UI Button to it."; UnityEngine.Debug.LogError(message); } else { // Continue with normal initialization, Text and Button objects are present. #if PLATFORM_ANDROID // Request to use the microphone, cf. // https://docs.unity3d.com/Manual/android-RequestingPermissions.html message = "Waiting for mic permission"; if (!Permission.HasUserAuthorizedPermission(Permission.Microphone)) { Permission.RequestUserPermission(Permission.Microphone); } #elif PLATFORM_IOS if (!Application.HasUserAuthorization(UserAuthorization.Microphone)) { Application.RequestUserAuthorization(UserAuthorization.Microphone); } #else micPermissionGranted = true; message = "Click button to recognize speech"; #endif grabacionCompleta = new StringBuilder(200); config = SpeechConfig.FromSubscription("b899f4a3bc2b4b30b3e690476b1af952", "westus"); config.SpeechRecognitionLanguage = "es-ES"; pushStream = AudioInputStream.CreatePushStream(); audioInput = AudioConfig.FromStreamInput(pushStream); recognizer = new SpeechRecognizer(config, audioInput); recognizer.Recognizing += RecognizingHandler; recognizer.Recognized += RecognizedHandler; recognizer.Canceled += CanceledHandler; recoButton.onClick.AddListener(ButtonClick); foreach (var device in Microphone.devices) { Debug.Log("DeviceName: " + device); } audioSource = GameObject.Find("MyAudioSource").GetComponent <AudioSource>(); } }
private static void dumpStreamChunk(File file, string text, long num, long num2) { AudioFileFormat audioFileFormat = AudioSystem.getAudioFileFormat(file); AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file); AudioFormat format = audioFileFormat.getFormat(); int num3 = Math.round((float)format.getFrameSize() * format.getFrameRate() / 1000f); audioInputStream.skip(num * (long)num3); AudioInputStream audioInputStream2 = new AudioInputStream(audioInputStream, format, num2 * (long)num3); AudioSystem.write(audioInputStream2, audioFileFormat.getType(), new File(text)); audioInputStream.close(); audioInputStream2.close(); }
void ConfigureSpeechRecognizer() { _speechConfig = SpeechConfig.FromSubscription(SubscriptionKey, SubscriptionRegion); _speechConfig.SpeechRecognitionLanguage = "es-US"; _speechConfig.OutputFormat = OutputFormat.Detailed; _pushStream = AudioInputStream.CreatePushStream(); _audioInput = AudioConfig.FromStreamInput(_pushStream); _speechRecognizer = new SpeechRecognizer(_speechConfig, _audioInput); _speechRecognizer.Recognizing += SpeechRecognizingHandler; _speechRecognizer.Recognized += SpeechRecognizedHandler; _speechRecognizer.Canceled += SpeechCanceledHandler; _audioSource = GameObject.Find("AudioSource").GetComponent <AudioSource>(); _audioSource.loop = false; _audioSource.playOnAwake = false; }
void Start() { if (outputText == null) { UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it."); } else if (recoButton == null) { _message = "recoButton property is null! Assign a UI Button to it."; UnityEngine.Debug.LogError(_message); } else { // Continue with normal initialization, Text and Button objects are present. #if PLATFORM_ANDROID // Request to use the microphone, cf. // https://docs.unity3d.com/Manual/android-RequestingPermissions.html message = "Waiting for mic permission"; if (!Permission.HasUserAuthorizedPermission(Permission.Microphone)) { Permission.RequestUserPermission(Permission.Microphone); } #elif PLATFORM_IOS if (!Application.HasUserAuthorization(UserAuthorization.Microphone)) { Application.RequestUserAuthorization(UserAuthorization.Microphone); } #else _micPermissionGranted = true; _message = "Click button to recognize speech"; #endif _config = SpeechTranslationConfig.FromSubscription(SubscriptionKey, SubscriptionRegion); _config.SpeechRecognitionLanguage = "es-US"; _config.AddTargetLanguage("en-US"); _pushStream = AudioInputStream.CreatePushStream(); _audioInput = AudioConfig.FromStreamInput(_pushStream); _recognizer = new TranslationRecognizer(_config, _audioInput); _recognizer.Recognizing += RecognizingHandler; _recognizer.Recognized += RecognizedHandler; _recognizer.Canceled += CanceledHandler; foreach (var device in Microphone.devices) { Debug.Log("DeviceName: " + device); } _audioSource = GameObject.Find("AudioSource").GetComponent <AudioSource>(); } }
public async UniTask STTBytes(byte[] readBytes, int sampleRate, int bitRate, int channels) { var speechConfig = SpeechConfig.FromSubscription(subscription_key, region); speechConfig.SpeechRecognitionLanguage = location; var audioStreamFormat = AudioStreamFormat.GetWaveFormatPCM((uint)sampleRate, (byte)bitRate, (byte)channels); var audioInputStream = AudioInputStream.CreatePushStream(audioStreamFormat); var audioConfig = AudioConfig.FromStreamInput(audioInputStream); var recognizer = new SpeechRecognizer(speechConfig, audioConfig); audioInputStream.Write(readBytes, readBytes.Length); var result = await recognizer.RecognizeOnceAsync(); Debug.Log($"Recognized Line : = {result.Text}"); }
public async Task Start() { var config = SpeechConfig.FromSubscription(_projectSettings.AzureSpeechServiceSubscriptionKey, _projectSettings.AzureSpeechServiceRegionName); var audioFormat = AudioStreamFormat.GetWaveFormatPCM(8000, 16, 1); _inputStream = AudioInputStream.CreatePushStream(audioFormat); _audioInput = AudioConfig.FromStreamInput(_inputStream); _recognizer = new SpeechRecognizer(config, _audioInput); _recognizer.SessionStarted += RecognizerStarted; _recognizer.Recognized += RecognizerRecognized; _recognizer.Canceled += RecognizerCancelled; await _recognizer.StartContinuousRecognitionAsync(); }
private void Init(string from, string to) { this.toLanguage = to; Profile = MediaEncodingProfile.CreateWav(AudioEncodingQuality.Low); Profile.Audio = AudioEncodingProperties.CreatePcm(16000, 1, 16); byte channels = 1; byte bitsPerSample = 16; uint samplesPerSecond = 16000; // or 8000 var audioFormat = AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels); // Init Push Stream pushStream = AudioInputStream.CreatePushStream(audioFormat); if (from == to) { var config = SpeechConfig.FromSubscription(apiKey, region); config.SpeechRecognitionLanguage = from; speechRecognizer = new SpeechRecognizer(config, AudioConfig.FromStreamInput(pushStream)); speechRecognizer.Recognizing += RecognisingSpeechHandler; speechRecognizer.Recognized += RecognisingSpeechHandler; speechRecognizer.SessionStarted += (sender, args) => this.RecognisionStarted?.Invoke(); speechRecognizer.SessionStopped += (sender, args) => this.RecognisionStopped?.Invoke(); } else { var config = SpeechTranslationConfig.FromSubscription(apiKey, region); config.SpeechRecognitionLanguage = from; config.AddTargetLanguage(to); translationRecognizer = new TranslationRecognizer(config, AudioConfig.FromStreamInput(pushStream)); translationRecognizer.SessionStarted += (sender, args) => this.RecognisionStarted?.Invoke(); translationRecognizer.SessionStopped += (sender, args) => this.RecognisionStopped?.Invoke(); translationRecognizer.Recognizing += RecognisingTranslationHandler; translationRecognizer.Recognized += RecognisingTranslationHandler; } }
public static short[] toSignedPCM(AudioInputStream ais) { AudioFormat format = ais.getFormat(); int num = ais.available(); int frameSize = format.getFrameSize(); short[] array = new short[(frameSize != -1) ? (num / frameSize) : (-num)]; byte[] array2 = new byte[format.getFrameSize()]; int num2 = 0; while (ais.read(array2) != -1) { short[] array3 = array; int num3 = num2; num2++; array3[num3] = Utils.bytesToShort(format, array2); } return(array); }
async static Task FromStream(SpeechConfig speechConfig) { var reader = new BinaryReader(File.OpenRead(DEMO_FILE)); Console.WriteLine(reader.ToString()); using var audioInputStream = AudioInputStream.CreatePushStream(); using var audioConfig = AudioConfig.FromStreamInput(audioInputStream); using var recognizer = new SpeechRecognizer(speechConfig, audioConfig); byte[] readBytes; do { readBytes = reader.ReadBytes(1024); audioInputStream.Write(readBytes, readBytes.Length); } while (readBytes.Length > 0); var result = await recognizer.RecognizeOnceAsync(); Console.WriteLine($"RECOGNIZED: Text={result.Text}"); }
public AzureSpeechRecognizer(string key, string region, WaveStream stream) { var speechConfig = SpeechConfig.FromSubscription(key, region); this.stream = NormalizeStream(stream); this.pushStream = AudioInputStream.CreatePushStream(); this.recognizer = new SpeechRecognizer(speechConfig, AudioConfig.FromStreamInput(this.pushStream)); this.resultId = Guid.NewGuid().ToString(); this.lockObj = new object(); this.recognizer.Recognized += (snd, evt) => { string id = null; lock (this.lockObj) { id = this.resultId; this.resultId = Guid.NewGuid().ToString(); } if (!string.IsNullOrWhiteSpace(evt.Result.Text)) { this.SpeechRecognized?.Invoke(this, new RecognitionEventArgs(evt, id)); } }; this.recognizer.Recognizing += (snd, evt) => { string id = null; lock (this.lockObj) { id = this.resultId; } this.SpeechPredicted?.Invoke(this, new RecognitionEventArgs(evt, id)); }; this.recognizer.Canceled += (snd, evt) => { Debug.WriteLine("lost recognizer"); }; }
public static AudioData readAudioFile(string filename) { AudioData result; try { BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(filename)); AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(bufferedInputStream); AudioData audioData = new AudioData(audioInputStream); bufferedInputStream.close(); result = audioData; } catch (UnsupportedAudioFileException) { goto IL_31; } return(result); IL_31: return(null); }
/// <summary>语音转文字 从内存流识别</summary> public static async Task <string> RecognizeFromStreamAsync(string inputFileName) { var config = SpeechConfig.FromSubscription(subscriptionKey, region); var reader = new BinaryReader(File.OpenRead(inputFileName)); using var audioInputStream = AudioInputStream.CreatePushStream(); using var audioConfig = AudioConfig.FromStreamInput(audioInputStream); using var recognizer = new SpeechRecognizer(config, audioConfig); byte[] readBytes; do { readBytes = reader.ReadBytes(1024); audioInputStream.Write(readBytes, readBytes.Length); } while (readBytes.Length > 0); var result = await recognizer.RecognizeOnceAsync(); return(result.Text); }
public virtual void setInputStream(AudioInputStream inputStream, string streamName) { this.dataStream = inputStream; this.streamEndReached = false; this.utteranceEndSent = false; this.utteranceStarted = false; AudioFormat format = inputStream.getFormat(); this.sampleRate = ByteCodeHelper.f2i(format.getSampleRate()); this.bigEndian = format.isBigEndian(); string text = format.toString(); this.logger.finer(new StringBuilder().append("input format is ").append(text).toString()); bool sampleSizeInBits = format.getSampleSizeInBits() != 0; int num = 8; if (num != -1 && (sampleSizeInBits ? 1 : 0) % num != 0) { string text2 = "StreamDataSource: bits per sample must be a multiple of 8."; throw new Error(text2); } this.bytesPerValue = format.getSampleSizeInBits() / 8; AudioFormat.Encoding encoding = format.getEncoding(); if (encoding.equals(AudioFormat.Encoding.PCM_SIGNED)) { this.signedData = true; } else { if (!encoding.equals(AudioFormat.Encoding.PCM_UNSIGNED)) { string text3 = "used file encoding is not supported"; throw new RuntimeException(text3); } this.signedData = false; } this.totalValuesRead = 0L; }
/// <summary> /// Creates Recognizer with baseline model and selected language: /// Creates a config with subscription key and selected region /// If input source is audio file, creates recognizer with audio file otherwise with default mic /// Waits on RunRecognition. /// </summary> private async Task CreateRecognizer(byte[] channel) { // Todo: suport users to specifiy a different region. var config = SpeechConfig.FromSubscription(this.SubscriptionKey, this.Region); config.SpeechRecognitionLanguage = this.RecognitionLanguage; config.OutputFormat = OutputFormat.Detailed; SpeechRecognizer basicRecognizer; PushAudioInputStream pushStream = AudioInputStream.CreatePushStream(); pushStream.Write(channel); pushStream.Close(); using (var audioInput = AudioConfig.FromStreamInput(pushStream)) { using (basicRecognizer = new SpeechRecognizer(config, audioInput)) { await this.RunRecognizer(basicRecognizer, stopBaseRecognitionTaskCompletionSource).ConfigureAwait(false); } } }
public async Task <string> AudioToTextAsync(byte[] pcm) { var guid = Guid.NewGuid(); if (!Text.ContainsKey(guid)) { Text[guid] = null; } // Build out the speech recognizer using (var pushStream = AudioInputStream.CreatePushStream(AudioStreamFormat.GetDefaultInputFormat())) using (var audioInput = AudioConfig.FromStreamInput(pushStream)) using (var recognizer = new SpeechRecognizer(SpeechConfig, audioInput)) { // Subscribe to speech recognizer events. recognizer.SessionStarted += OnSpeechRecognitionSessionStarted; recognizer.Recognizing += OnSpeechRecognizing; recognizer.Recognized += (s, e) => OnSpeechRecognized(s, e, guid); recognizer.Canceled += OnSpeechCanceled; recognizer.SessionStopped += OnSpeechRecognitionSessionStopped; // Start continuous recognition. Uses StopContinuousRecognitionAsync() to stop recognition. await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); // Send the pcm data to the speech recognizer pushStream.Write(pcm); pushStream.Close(); // Wait for completion. // Use Task.WaitAny to keep the task rooted. Task.WaitAny(StopRecognition.Task); // Stop recognition. await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); return(Text[guid]); } }
internal TargetDataLineInputStream(AudioInputStream _enclosing, TargetDataLine line) : base() { this._enclosing = _enclosing; this.line = line; }
/// <summary> /// Indicates whether an audio file of the type specified can be written /// from the audio input stream indicated. /// </summary> /// <remarks> /// Indicates whether an audio file of the type specified can be written /// from the audio input stream indicated. /// </remarks> /// <param name="fileType">file type for which write capabilities are queried</param> /// <param name="stream">for which file writing support is queried</param> /// <returns> /// <code>true</code> if the file type is supported for this audio input stream, /// otherwise <code>false</code> /// </returns> public virtual bool isFileTypeSupported(AudioFileFormat.Type fileType, AudioInputStream stream) { AudioFileFormat.Type[] types = getAudioFileTypes(stream); for (int i = 0; i < types.Length; i++) { if (fileType.Equals(types[i])) { return true; } } return false; }
/// <summary> /// Obtains the file types that this audio file writer can write from the /// audio input stream specified. /// </summary> /// <remarks> /// Obtains the file types that this audio file writer can write from the /// audio input stream specified. /// </remarks> /// <param name="stream"> /// the audio input stream for which audio file type support /// is queried /// </param> /// <returns> /// array of file types. If no file types are supported, /// an array of length 0 is returned. /// </returns> public abstract AudioFileFormat.Type[] getAudioFileTypes(AudioInputStream stream);
/// <summary> /// Writes a stream of bytes representing an audio file of the file format /// indicated to the external file provided. /// </summary> /// <remarks> /// Writes a stream of bytes representing an audio file of the file format /// indicated to the external file provided. /// </remarks> /// <param name="stream"> /// the audio input stream containing audio data to be /// written to the file /// </param> /// <param name="fileType">file type to be written to the file</param> /// <param name="out">external file to which the file data should be written</param> /// <returns>the number of bytes written to the file</returns> /// <exception cref="System.IO.IOException">if an I/O exception occurs</exception> /// <exception cref="System.ArgumentException"> /// if the file format is not supported by /// the system /// </exception> /// <seealso cref="isFileTypeSupported(Type)">isFileTypeSupported(Type)</seealso> /// <seealso cref="getAudioFileTypes()">getAudioFileTypes()</seealso> public abstract int write(AudioInputStream stream, AudioFileFormat.Type fileType, java.io.File @out);
/// <summary> /// Obtains an audio input stream with the specified encoding from the given audio /// input stream. /// </summary> /// <remarks> /// Obtains an audio input stream with the specified encoding from the given audio /// input stream. /// </remarks> /// <param name="targetEncoding">desired encoding of the stream after processing</param> /// <param name="sourceStream">stream from which data to be processed should be read</param> /// <returns>stream from which processed data with the specified target encoding may be read /// </returns> /// <exception cref="System.ArgumentException"> /// if the format combination supplied is /// not supported. /// </exception> public abstract AudioInputStream getAudioInputStream(AudioFormat.Encoding targetEncoding, AudioInputStream sourceStream);
/// <summary> /// Obtains an audio input stream with the specified format from the given audio /// input stream. /// </summary> /// <remarks> /// Obtains an audio input stream with the specified format from the given audio /// input stream. /// </remarks> /// <param name="targetFormat">desired data format of the stream after processing</param> /// <param name="sourceStream">stream from which data to be processed should be read</param> /// <returns>stream from which processed data with the specified format may be read</returns> /// <exception cref="System.ArgumentException"> /// if the format combination supplied is /// not supported. /// </exception> public abstract AudioInputStream getAudioInputStream(AudioFormat targetFormat, AudioInputStream sourceStream);