/// <summary> /// Render an audio RTP packet received from a remote party. /// </summary> /// <param name="rtpPacket">The RTP packet containing the audio payload.</param> private void RenderAudio(RTPPacket rtpPacket) { if (_waveProvider != null) { var sample = rtpPacket.Payload; if (rtpPacket.Header.PayloadType == (int)SDPMediaFormatsEnum.PCMA || rtpPacket.Header.PayloadType == (int)SDPMediaFormatsEnum.PCMU) { Complex[] rawSamples = new Complex[sample.Length]; for (int index = 0; index < sample.Length; index++) { short pcm = 0; if (rtpPacket.Header.PayloadType == (int)SDPMediaFormatsEnum.PCMA) { pcm = NAudio.Codecs.ALawDecoder.ALawToLinearSample(sample[index]); byte[] pcmSample = new byte[] { (byte)(pcm & 0xFF), (byte)(pcm >> 8) }; _waveProvider.AddSamples(pcmSample, 0, 2); } else if (rtpPacket.Header.PayloadType == (int)SDPMediaFormatsEnum.PCMU) { pcm = NAudio.Codecs.MuLawDecoder.MuLawToLinearSample(sample[index]); byte[] pcmSample = new byte[] { (byte)(pcm & 0xFF), (byte)(pcm >> 8) }; _waveProvider.AddSamples(pcmSample, 0, 2); } rawSamples[index] = pcm / 32768f; } OnAudioScopeSampleReady?.Invoke(rawSamples); } else if (rtpPacket.Header.PayloadType == (int)SDPMediaFormatsEnum.G722) { short[] outBuffer = new short[sample.Length * 2]; // Decompressed PCM samples. int decodedSamples = _g722Decode.Decode(_g722DecodeState, outBuffer, sample, sample.Length); //Log.LogDebug($"g722 decode input samples {sample.Length}, decoded samples {decodedSamples}."); for (int i = 0; i < decodedSamples; i++) { var pcm = outBuffer[i]; byte[] pcmSample = new byte[] { (byte)(pcm & 0xFF), (byte)(pcm >> 8) }; _waveProvider.AddSamples(pcmSample, 0, 2); } } else { Log.LogWarning("RTP packet received with unrecognised payload ID, ignoring."); } } }
/// <summary> /// Event handler for receiving RTP packets from the remote party. /// </summary> /// <param name="remoteEP">The remote end point the RTP was received from.</param> /// <param name="format">The audio format of the encoded packets.</param> /// <param name="rtpPacket">The RTP packet with the media sample.</param> public short[] DecodeAudio(byte[] encodedSample, AudioFormat format) { if (format.Codec == AudioCodecsEnum.G722) { if (_g722Decoder == null) { _g722Decoder = new G722Codec(); _g722DecoderState = new G722CodecState(G722_BIT_RATE, G722Flags.None); } short[] decodedPcm = new short[encodedSample.Length * 2]; int decodedSampleCount = _g722Decoder.Decode(_g722DecoderState, decodedPcm, encodedSample, encodedSample.Length); return(decodedPcm.Take(decodedSampleCount).ToArray()); } else if (format.Codec == AudioCodecsEnum.PCMA) { return(encodedSample.Select(x => ALawDecoder.ALawToLinearSample(x)).ToArray()); } else if (format.Codec == AudioCodecsEnum.PCMU) { return(encodedSample.Select(x => MuLawDecoder.MuLawToLinearSample(x)).ToArray()); } else if (format.Codec == AudioCodecsEnum.L16) { // Samples are on the wire as big endian. return(encodedSample.Where((x, i) => i % 2 == 0) .Select((y, i) => (short)(encodedSample[i * 2] << 8 | encodedSample[i * 2 + 1])).ToArray()); } else if (format.Codec == AudioCodecsEnum.PCM_S16LE) { // Samples are on the wire as little endian (well unlikely to be on the wire in this case but when they // arrive from somewhere like the SkypeBot SDK they will be in little endian format). return(encodedSample.Where((x, i) => i % 2 == 0) .Select((y, i) => (short)(encodedSample[i * 2 + 1] << 8 | encodedSample[i * 2])).ToArray()); } else { throw new ApplicationException($"Audio format {format.Codec} cannot be decoded."); } }
/// <summary> /// Event handler for receiving RTP packets from the remote party. /// </summary> /// <param name="mediaType">The media type of the packets.</param> /// <param name="rtpPacket">The RTP packet with the media sample.</param> private void RtpPacketReceived(SDPMediaTypesEnum mediaType, RTPPacket rtpPacket) { if (mediaType == SDPMediaTypesEnum.audio) { bool wants8kSamples = OnRemoteAudioSampleReady != null; bool wants16kSamples = OnRemote16KHzPcmSampleReady != null; if (wants8kSamples || wants16kSamples) { var sample = rtpPacket.Payload; if (_sendingFormat.FormatCodec == SDPMediaFormatsEnum.G722) { short[] decodedPcm16k = new short[sample.Length * 2]; int decodedSampleCount = _g722Decoder.Decode(_g722DecoderState, decodedPcm16k, sample, sample.Length); // The decoder provides short samples but streams and devices generally seem to want // byte samples so convert them. byte[] pcm8kBuffer = (wants8kSamples) ? new byte[decodedSampleCount] : null; byte[] pcm16kBuffer = (wants16kSamples) ? new byte[decodedSampleCount * 2] : null; for (int i = 0; i < decodedSampleCount; i++) { var bufferSample = BitConverter.GetBytes(decodedPcm16k[i]); // For 8K samples the crude re-sampling to get from 16K to 8K is to skip // every second sample. if (pcm8kBuffer != null && i % 2 == 0) { pcm8kBuffer[(i / 2) * 2] = bufferSample[0]; pcm8kBuffer[(i / 2) * 2 + 1] = bufferSample[1]; } // G722 provides 16k samples. if (pcm16kBuffer != null) { pcm16kBuffer[i * 2] = bufferSample[0]; pcm16kBuffer[i * 2 + 1] = bufferSample[1]; } } OnRemoteAudioSampleReady?.Invoke(pcm8kBuffer); OnRemote16KHzPcmSampleReady?.Invoke(pcm16kBuffer); } else if (_sendingFormat.FormatCodec == SDPMediaFormatsEnum.PCMA || _sendingFormat.FormatCodec == SDPMediaFormatsEnum.PCMU) { Func <byte, short> decode = (_sendingFormat.FormatCodec == SDPMediaFormatsEnum.PCMA) ? (Func <byte, short>)ALawDecoder.ALawToLinearSample : MuLawDecoder.MuLawToLinearSample; byte[] pcm8kBuffer = (wants8kSamples) ? new byte[sample.Length * 2] : null; byte[] pcm16kBuffer = (wants16kSamples) ? new byte[sample.Length * 4] : null; for (int i = 0; i < sample.Length; i++) { var bufferSample = BitConverter.GetBytes(decode(sample[i])); // G711 samples at 8KHz. if (pcm8kBuffer != null) { pcm8kBuffer[i * 2] = bufferSample[0]; pcm8kBuffer[i * 2 + 1] = bufferSample[1]; } // The crude up-sampling approach to get 16K samples from G711 is to // duplicate each 8K sample. // TODO: This re-sampling approach introduces artifacts. Applying a low pass // filter seems to be recommended. if (pcm16kBuffer != null) { pcm16kBuffer[i * 4] = bufferSample[0]; pcm16kBuffer[i * 4 + 1] = bufferSample[1]; pcm16kBuffer[i * 4 + 2] = bufferSample[0]; pcm16kBuffer[i * 4 + 3] = bufferSample[1]; } } OnRemoteAudioSampleReady?.Invoke(pcm8kBuffer); OnRemote16KHzPcmSampleReady?.Invoke(pcm16kBuffer); } else { // Ignore the sample. It's for an unsupported codec. It will be up to the application // to decode. } } } }
/// <summary> /// Event handler for receiving RTP packets from the remote party. /// </summary> /// <param name="remoteEP">The remote end point the RTP was received from.</param> /// <param name="codec">The encoding codec of the packets.</param> /// <param name="rtpPacket">The RTP packet with the media sample.</param> public byte[] DecodeAudio(byte[] encodedSample, AudioCodecsEnum codec, AudioSamplingRatesEnum sampleRate) { bool wants8kSamples = sampleRate == AudioSamplingRatesEnum.Rate8KHz; bool wants16kSamples = sampleRate == AudioSamplingRatesEnum.Rate16KHz; if (codec == AudioCodecsEnum.G722) { if (_g722Decoder == null) { _g722Decoder = new G722Codec(); _g722DecoderState = new G722CodecState(G722_BIT_RATE, G722Flags.None); } short[] decodedPcm16k = new short[encodedSample.Length * 2]; int decodedSampleCount = _g722Decoder.Decode(_g722DecoderState, decodedPcm16k, encodedSample, encodedSample.Length); // The decoder provides short samples but streams and devices generally seem to want // byte samples so convert them. byte[] pcm8kBuffer = (wants8kSamples) ? new byte[decodedSampleCount] : null; byte[] pcm16kBuffer = (wants16kSamples) ? new byte[decodedSampleCount * 2] : null; for (int i = 0; i < decodedSampleCount; i++) { var bufferSample = BitConverter.GetBytes(decodedPcm16k[i]); // For 8K samples the crude re-sampling to get from 16K to 8K is to skip // every second sample. if (pcm8kBuffer != null && i % 2 == 0) { pcm8kBuffer[(i / 2) * 2] = bufferSample[0]; pcm8kBuffer[(i / 2) * 2 + 1] = bufferSample[1]; } // G722 provides 16k samples. if (pcm16kBuffer != null) { pcm16kBuffer[i * 2] = bufferSample[0]; pcm16kBuffer[i * 2 + 1] = bufferSample[1]; } } return(pcm8kBuffer ?? pcm16kBuffer); } else if (codec == AudioCodecsEnum.PCMA || codec == AudioCodecsEnum.PCMU) { Func <byte, short> decode = (codec == AudioCodecsEnum.PCMA) ? (Func <byte, short>)ALawDecoder.ALawToLinearSample : MuLawDecoder.MuLawToLinearSample; byte[] pcm8kBuffer = (wants8kSamples) ? new byte[encodedSample.Length * 2] : null; byte[] pcm16kBuffer = (wants16kSamples) ? new byte[encodedSample.Length * 4] : null; for (int i = 0; i < encodedSample.Length; i++) { var bufferSample = BitConverter.GetBytes(decode(encodedSample[i])); // G711 samples at 8KHz. if (pcm8kBuffer != null) { pcm8kBuffer[i * 2] = bufferSample[0]; pcm8kBuffer[i * 2 + 1] = bufferSample[1]; } // The crude up-sampling approach to get 16K samples from G711 is to // duplicate each 8K sample. // TODO: This re-sampling approach introduces artifacts. Applying a low pass // filter seems to be recommended. if (pcm16kBuffer != null) { pcm16kBuffer[i * 4] = bufferSample[0]; pcm16kBuffer[i * 4 + 1] = bufferSample[1]; pcm16kBuffer[i * 4 + 2] = bufferSample[0]; pcm16kBuffer[i * 4 + 3] = bufferSample[1]; } } return(pcm8kBuffer ?? pcm16kBuffer); } else { throw new ApplicationException($"Audio format {codec} cannot be decoded."); } }