public static AudioInputStream OpenWaveFile(BinaryReader reader) { AudioInputStreamFormat format = new AudioInputStreamFormat(); // Tag "RIFF" char[] data = new char[4]; reader.Read(data, 0, 4); if (data[0] != 'R' || data[1] != 'I' || data[2] != 'F' || data[3] != 'F') { throw new global::System.FormatException("Wrong wav header"); } // Chunk size long fileSize = reader.ReadInt32(); // Subchunk, Wave Header // Subchunk, Format // Tag: "WAVE" reader.Read(data, 0, 4); if ((data[0] != 'W') || (data[1] != 'A') || (data[2] != 'V') || (data[3] != 'E')) { throw new global::System.FormatException("Wrong wav tag in wav header"); } // Tag: "fmt" reader.Read(data, 0, 4); if ((data[0] != 'f') || (data[1] != 'm') || (data[2] != 't') && (data[3] != ' ')) { throw new global::System.FormatException("Wrong format tag in wav header"); } // chunk format size long formatSize = reader.ReadInt32(); format.FormatTag = reader.ReadUInt16(); format.Channels = reader.ReadUInt16(); format.SamplesPerSec = (int)reader.ReadUInt32(); format.AvgBytesPerSec = (int)reader.ReadUInt32(); format.BlockAlign = reader.ReadUInt16(); format.BitsPerSample = reader.ReadUInt16(); // Until now we have read 16 bytes in format, the rest is cbSize and is ignored for now. if (formatSize > 16) { reader.ReadBytes((int)(formatSize - 16)); } // Second Chunk, data // tag: data. reader.Read(data, 0, 4); if ((data[0] != 'd') || (data[1] != 'a') || (data[2] != 't') || (data[3] != 'a')) { throw new global::System.FormatException("Wrong data tag in wav"); } // data chunk size int dataSize = reader.ReadInt32(); // now, we have the format in the format parameter and the // reader set to the start of the body, i.e., the raw sample data return(new BinaryAudioStreamReader(format, reader)); }
public VoiceAudioStream(AudioInputStreamFormat format) { // Making the job slightly easier by requiring audio format in the constructor. // Cognitive Speech services expect: // - PCM WAV // - 16k samples/s // - 32k bytes/s // - 2 block align // - 16 bits per sample // - mono _format = format; _dataStream = new EchoStream(); }
private void CreateSpeechClient() { var format = new AudioInputStreamFormat() { BitsPerSample = 16, BlockAlign = 2, AvgBytesPerSec = 32000, Channels = 1, FormatTag = 1, SamplesPerSec = 16000 }; _voiceAudioStream = new VoiceAudioStream(format); // custom AudioInputStream var factory = SpeechFactory.FromSubscription(_speechKey, _speechRegion); _speechClient = factory.CreateSpeechRecognizerWithStream(_voiceAudioStream, "en-gb"); _speechClient.RecognitionErrorRaised += _speechClient_RecognitionErrorRaised; _speechClient.IntermediateResultReceived += _speechClient_IntermediateResultReceived; _speechClient.FinalResultReceived += _speechClient_FinalResultReceived; }