/// <summary> /// Initializes a new instance of the <see cref="FFMPEGMediaSource"/> class. /// </summary> /// <param name="pipeline">Pipeline this component is a part of</param> /// <param name="filename">Name of media file to play</param> /// <param name="format">Output format for images</param> public FFMPEGMediaSource(Pipeline pipeline, string filename, PixelFormat format = PixelFormat.BGRX_32bpp) : base(pipeline) { FileInfo info = new FileInfo(filename); pipeline.ProposeReplayTime(new TimeInterval(info.CreationTime, DateTime.MaxValue), new TimeInterval(info.CreationTime, DateTime.MaxValue)); this.start = info.CreationTime; this.filename = filename; this.Image = pipeline.CreateEmitter <Shared <Image> >(this, nameof(this.Image)); this.Audio = pipeline.CreateEmitter <AudioBuffer>(this, nameof(this.Audio)); this.mpegReader = new FFMPEGReader((format == PixelFormat.BGRX_32bpp) ? 32 : 24); this.mpegReader.Open(filename, new FFMPEGReaderConfiguration()); this.waveFormat = WaveFormat.CreatePcm(this.mpegReader.AudioSampleRate, this.mpegReader.AudioBitsPerSample, this.mpegReader.AudioNumChannels); this.outputFormat = format; this.audioBufferSize = 0; this.audioData = IntPtr.Zero; this.audioDataSize = 0; }
public void WaveFormat_CreatePcm() { // Define "native" WAVEFORMATEX structure for PCM byte[] formatBytes = new byte[] { 0x01, 0x00, // FormatTag = 1 0x02, 0x00, // Channels = 2 0x44, 0xac, 0x00, 0x00, // SamplesPerSec = 44100 0x10, 0xb1, 0x02, 0x00, // AvgBytesPerSec = 176400 0x04, 0x00, // BlockAlign = 4 0x10, 0x00, // BitsPerSample = 16 0x00, 0x00, // ExtraSize = 0 }; // Create equivalent managed WaveFormat object WaveFormat format = WaveFormat.CreatePcm(44100, 16, 2); // Verify against expected this.MarshalAndVerify(format, formatBytes); }
/// <summary> /// Called by the ctor to configure the media playback component. /// </summary> private void InitializeMediaPipeline() { MediaManager.Startup(false); MediaAttributes sourceReaderAttributes = new MediaAttributes(); sourceReaderAttributes.Set(SourceReaderAttributeKeys.EnableAdvancedVideoProcessing, true); this.sourceReader = new SourceReader(this.filename, sourceReaderAttributes); this.sourceReader.SetStreamSelection(SourceReaderIndex.AllStreams, false); int streamIndex = 0; bool doneEnumerating = false; while (!doneEnumerating) { try { MediaType mediaType = this.sourceReader.GetCurrentMediaType(streamIndex); var subType = mediaType.Get(MediaTypeAttributeKeys.Subtype); DumpMediaType(mediaType); if (mediaType.MajorType == MediaTypeGuids.Video && this.imageStreamIndex == -1) { this.imageStreamIndex = streamIndex; // get the image size long frameSize = mediaType.Get(MediaTypeAttributeKeys.FrameSize); this.videoHeight = (short)frameSize; this.videoWidth = (short)(frameSize >> 32); // enable the stream and set the current media type this.sourceReader.SetStreamSelection(this.imageStreamIndex, true); mediaType = new MediaType(); mediaType.Set(MediaTypeAttributeKeys.MajorType, MediaTypeGuids.Video); mediaType.Set(MediaTypeAttributeKeys.Subtype, VideoFormatGuids.Rgb24); mediaType.Set(MediaTypeAttributeKeys.FrameSize, frameSize); this.sourceReader.SetCurrentMediaType(this.imageStreamIndex, mediaType); } else if (mediaType.MajorType == MediaTypeGuids.Audio && this.audioStreamIndex == -1) { this.audioStreamIndex = streamIndex; // enable the stream and set the current media type to PCM this.sourceReader.SetStreamSelection(this.audioStreamIndex, true); mediaType = new MediaType(); mediaType.Set(MediaTypeAttributeKeys.MajorType, MediaTypeGuids.Audio); mediaType.Set(MediaTypeAttributeKeys.Subtype, AudioFormatGuids.Pcm); this.sourceReader.SetCurrentMediaType(this.audioStreamIndex, mediaType); // get back all the media type details mediaType = this.sourceReader.GetCurrentMediaType(this.audioStreamIndex); int numberOfChannels = mediaType.Get(MediaTypeAttributeKeys.AudioNumChannels); int sampleRate = mediaType.Get(MediaTypeAttributeKeys.AudioSamplesPerSecond); int bitsPerSample = mediaType.Get(MediaTypeAttributeKeys.AudioBitsPerSample); // post our output audio format this.waveFormat = WaveFormat.CreatePcm(sampleRate, bitsPerSample, numberOfChannels); } } catch (Exception e) { Debug.Write(e.GetType()); // expected thrown exception // unfortunately no way to tell how many streams other than trying doneEnumerating = true; } streamIndex += 1; } }
private void ButtonCreatePcm_Click(object sender, RoutedEventArgs e) { Update(WaveFormat.CreatePcm((int)Format.SamplesPerSec, Format.BitsPerSample, Format.Channels)); }
public async Task TranscribeConversationsAsync(IEnumerable <string> voiceSignatureStringUsers) { uint samplesPerSecond = 16000; byte bitsPerSample = 16; byte channels = 8; // 7 + 1 channels var config = SpeechConfig.FromSubscription(this.SubscriptionKey, this.Region); config.SetProperty("ConversationTranscriptionInRoomAndOnline", "true"); var stopRecognition = new TaskCompletionSource <int>(); using (var audioInput = AudioInputStream.CreatePushStream(AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, bitsPerSample, channels))) { var meetingID = Guid.NewGuid().ToString(); using (var conversation = await Conversation.CreateConversationAsync(config, meetingID)) { // create a conversation transcriber using audio stream input using (this.conversationTranscriber = new ConversationTranscriber(AudioConfig.FromStreamInput(audioInput))) { conversationTranscriber.Transcribing += (s, e) => { this.SetText($"TRANSCRIBING: Text={e.Result.Text} SpeakerId={e.Result.UserId}"); }; conversationTranscriber.Transcribed += (s, e) => { if (e.Result.Reason == ResultReason.RecognizedSpeech) { this.SetText($"TRANSCRIBED: Text={e.Result.Text} SpeakerId={e.Result.UserId}"); } else if (e.Result.Reason == ResultReason.NoMatch) { this.SetText($"NOMATCH: Speech could not be recognized."); } }; conversationTranscriber.Canceled += (s, e) => { this.SetText($"CANCELED: Reason={e.Reason}"); if (e.Reason == CancellationReason.Error) { this.SetText($"CANCELED: ErrorCode={e.ErrorCode}"); this.SetText($"CANCELED: ErrorDetails={e.ErrorDetails}"); this.SetText($"CANCELED: Did you update the subscription info?"); stopRecognition.TrySetResult(0); } }; conversationTranscriber.SessionStarted += (s, e) => { this.SetText($"\nSession started event. SessionId={e.SessionId}"); }; conversationTranscriber.SessionStopped += (s, e) => { this.SetText($"\nSession stopped event. SessionId={e.SessionId}"); this.SetText("\nStop recognition."); stopRecognition.TrySetResult(0); }; // Add participants to the conversation. int i = 1; foreach (var voiceSignatureStringUser in voiceSignatureStringUsers) { var speaker = Participant.From($"User{i++}", "en-US", voiceSignatureStringUser); await conversation.AddParticipantAsync(speaker); } // Join to the conversation and start transcribing await conversationTranscriber.JoinConversationAsync(conversation); await conversationTranscriber.StartTranscribingAsync().ConfigureAwait(false); using (var p = Pipeline.Create()) { var store = PsiStore.Create(p, "Transcribe", @"D:\Temp"); var capture = new AudioCapture(p, WaveFormat.CreatePcm((int)samplesPerSecond, bitsPerSample, channels)).Write("Audio", store); capture.Do(audio => audioInput.Write(audio.Data)); p.RunAsync(); // waits for completion, then stop transcription await stopRecognition.Task; } await conversationTranscriber.StopTranscribingAsync().ConfigureAwait(false); } } } }