public static AudioInputStream OpenWaveFile(BinaryReader reader)
        {
            AudioInputStreamFormat format = new AudioInputStreamFormat();

            // Tag "RIFF"
            char[] data = new char[4];
            reader.Read(data, 0, 4);
            if (data[0] != 'R' || data[1] != 'I' || data[2] != 'F' || data[3] != 'F')
            {
                throw new global::System.FormatException("Wrong wav header");
            }

            // Chunk size
            long fileSize = reader.ReadInt32();

            // Subchunk, Wave Header
            // Subchunk, Format
            // Tag: "WAVE"
            reader.Read(data, 0, 4);
            if ((data[0] != 'W') || (data[1] != 'A') || (data[2] != 'V') || (data[3] != 'E'))
            {
                throw new global::System.FormatException("Wrong wav tag in wav header");
            }
            // Tag: "fmt"
            reader.Read(data, 0, 4);
            if ((data[0] != 'f') || (data[1] != 'm') || (data[2] != 't') && (data[3] != ' '))
            {
                throw new global::System.FormatException("Wrong format tag in wav header");
            }
            // chunk format size
            long formatSize = reader.ReadInt32();

            format.FormatTag      = reader.ReadUInt16();
            format.Channels       = reader.ReadUInt16();
            format.SamplesPerSec  = (int)reader.ReadUInt32();
            format.AvgBytesPerSec = (int)reader.ReadUInt32();
            format.BlockAlign     = reader.ReadUInt16();
            format.BitsPerSample  = reader.ReadUInt16();
            // Until now we have read 16 bytes in format, the rest is cbSize and is ignored for now.
            if (formatSize > 16)
            {
                reader.ReadBytes((int)(formatSize - 16));
            }
            // Second Chunk, data
            // tag: data.
            reader.Read(data, 0, 4);

            if ((data[0] != 'd') || (data[1] != 'a') || (data[2] != 't') || (data[3] != 'a'))
            {
                throw new global::System.FormatException("Wrong data tag in wav");
            }
            // data chunk size
            int dataSize = reader.ReadInt32();

            // now, we have the format in the format parameter and the
            // reader set to the start of the body, i.e., the raw sample data
            return(new BinaryAudioStreamReader(format, reader));
        }
 public VoiceAudioStream(AudioInputStreamFormat format)
 {
     // Making the job slightly easier by requiring audio format in the constructor.
     // Cognitive Speech services expect:
     //  - PCM WAV
     //  - 16k samples/s
     //  - 32k bytes/s
     //  - 2 block align
     //  - 16 bits per sample
     //  - mono
     _format     = format;
     _dataStream = new EchoStream();
 }
Beispiel #3
0
        private void CreateSpeechClient()
        {
            var format = new AudioInputStreamFormat()
            {
                BitsPerSample  = 16,
                BlockAlign     = 2,
                AvgBytesPerSec = 32000,
                Channels       = 1,
                FormatTag      = 1,
                SamplesPerSec  = 16000
            };

            _voiceAudioStream = new VoiceAudioStream(format); // custom AudioInputStream

            var factory = SpeechFactory.FromSubscription(_speechKey, _speechRegion);

            _speechClient = factory.CreateSpeechRecognizerWithStream(_voiceAudioStream, "en-gb");
            _speechClient.RecognitionErrorRaised     += _speechClient_RecognitionErrorRaised;
            _speechClient.IntermediateResultReceived += _speechClient_IntermediateResultReceived;
            _speechClient.FinalResultReceived        += _speechClient_FinalResultReceived;
        }