/// <summary> /// Transcodes the source audio to the target format and quality. /// </summary> /// <param name="formatType">Format to convert this audio to.</param> /// <param name="quality">Quality of the processed output audio. For streaming formats, it can be one of the following: Low (96 kbps), Medium (128 kbps), Best (192 kbps). For WAV formats, it can be one of the following: Low (11kHz ADPCM), Medium (22kHz ADPCM), Best (44kHz PCM)</param> /// <param name="saveToFile"> /// The name of the file that the converted audio should be saved into. This is used for SongContent, where /// the audio is stored external to the XNB file. If this is null, then the converted audio is stored in /// the Data property. /// </param> public void ConvertFormat(ConversionFormat formatType, ConversionQuality quality, string saveToFile) { if (disposed) throw new ObjectDisposedException("AudioContent"); var temporarySource = Path.GetTempFileName(); var temporaryOutput = Path.GetTempFileName(); try { using (var fs = new FileStream(temporarySource, FileMode.Create, FileAccess.Write)) { var dataBytes = this.data.ToArray(); fs.Write(dataBytes, 0, dataBytes.Length); } string ffmpegCodecName, ffmpegMuxerName; int format; switch (formatType) { case ConversionFormat.Adpcm: // ADPCM Microsoft ffmpegCodecName = "adpcm_ms"; ffmpegMuxerName = "wav"; format = 0x0002; /* WAVE_FORMAT_ADPCM */ break; case ConversionFormat.Pcm: // PCM signed 16-bit little-endian ffmpegCodecName = "pcm_s16le"; ffmpegMuxerName = "s16le"; format = 0x0001; /* WAVE_FORMAT_PCM */ break; case ConversionFormat.WindowsMedia: // Windows Media Audio 2 ffmpegCodecName = "wmav2"; ffmpegMuxerName = "asf"; format = 0x0161; /* WAVE_FORMAT_WMAUDIO2 */ break; case ConversionFormat.Xma: throw new NotSupportedException( "XMA is not a supported encoding format. It is specific to the Xbox 360."); case ConversionFormat.ImaAdpcm: // ADPCM IMA WAV ffmpegCodecName = "adpcm_ima_wav"; ffmpegMuxerName = "wav"; format = 0x0011; /* WAVE_FORMAT_IMA_ADPCM */ break; case ConversionFormat.Aac: // AAC (Advanced Audio Coding) // Requires -strict experimental ffmpegCodecName = "aac"; ffmpegMuxerName = "ipod"; format = 0x0000; /* WAVE_FORMAT_UNKNOWN */ break; case ConversionFormat.Vorbis: // Vorbis ffmpegCodecName = "libvorbis"; ffmpegMuxerName = "ogg"; format = 0x0000; /* WAVE_FORMAT_UNKNOWN */ break; default: // Unknown format throw new NotSupportedException(); } string ffmpegStdout, ffmpegStderr; var ffmpegExitCode = ExternalTool.Run( "ffmpeg", string.Format( "-y -i \"{0}\" -vn -c:a {1} -b:a {2} -f:a {3} -strict experimental \"{4}\"", temporarySource, ffmpegCodecName, QualityToBitRate(quality), ffmpegMuxerName, temporaryOutput), out ffmpegStdout, out ffmpegStderr); if (ffmpegExitCode != 0) { throw new InvalidOperationException("ffmpeg exited with non-zero exit code: \n" + ffmpegStdout + "\n" + ffmpegStderr); } byte[] rawData; using (var fs = new FileStream(temporaryOutput, FileMode.Open, FileAccess.Read)) { rawData = new byte[fs.Length]; fs.Read(rawData, 0, rawData.Length); } if (saveToFile != null) { using (var fs = new FileStream(saveToFile, FileMode.Create, FileAccess.Write)) { fs.Write(rawData, 0, rawData.Length); } this.data = null; } else { this.data = rawData.ToList(); } string ffprobeStdout, ffprobeStderr; var ffprobeExitCode = ExternalTool.Run( "ffprobe", string.Format("-i \"{0}\" -show_entries streams -v quiet -of flat", temporarySource), out ffprobeStdout, out ffprobeStderr); if (ffprobeExitCode != 0) { throw new InvalidOperationException("ffprobe exited with non-zero exit code."); } // Set default values if information is not available. int averageBytesPerSecond = 0; int bitsPerSample = 0; int blockAlign = 0; int channelCount = 0; int sampleRate = 0; double durationInSeconds = 0; var numberFormat = System.Globalization.CultureInfo.InvariantCulture.NumberFormat; foreach (var line in ffprobeStdout.Split(new[] { '\r', '\n', '\0' }, StringSplitOptions.RemoveEmptyEntries)) { var kv = line.Split(new[] { '=' }, 2); switch (kv[0]) { case "streams.stream.0.sample_rate": sampleRate = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.bits_per_sample": bitsPerSample = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.duration": durationInSeconds = double.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.channels": channelCount = int.Parse(kv[1].Trim('"'), numberFormat); break; } } // This information is not available from ffprobe (and may or may not // be relevant for non-PCM formats anyway): // // * averageBytesPerSecond // * blockAlign this.duration = TimeSpan.FromSeconds(durationInSeconds); this.format = new AudioFormat( averageBytesPerSecond, bitsPerSample, blockAlign, channelCount, format, sampleRate); } finally { File.Delete(temporarySource); File.Delete(temporaryOutput); } }
/// <summary> /// Read an audio file. /// </summary> void Read() { #if WINDOWS reader = new MediaFoundationReader(fileName); duration = reader.TotalTime; format = new AudioFormat(reader.WaveFormat); var bytes = new byte[reader.Length]; var read = reader.Read(bytes, 0, bytes.Length); data = new List<byte>(bytes); #else throw new NotImplementedException(); #endif }
/// <summary> /// Transcodes the source audio to the target format and quality. /// </summary> /// <param name="formatType">Format to convert this audio to.</param> /// <param name="quality">Quality of the processed output audio. For streaming formats, it can be one of the following: Low (96 kbps), Medium (128 kbps), Best (192 kbps). For WAV formats, it can be one of the following: Low (11kHz ADPCM), Medium (22kHz ADPCM), Best (44kHz PCM)</param> /// <param name="saveToFile"> /// The name of the file that the converted audio should be saved into. This is used for SongContent, where /// the audio is stored external to the XNB file. If this is null, then the converted audio is stored in /// the Data property. /// </param> public void ConvertFormat(ConversionFormat formatType, ConversionQuality quality, string saveToFile) { var temporarySource = Path.GetTempFileName(); var temporaryOutput = Path.GetTempFileName(); try { using (var fs = new FileStream(temporarySource, FileMode.Create, FileAccess.Write)) { var dataBytes = this.data.ToArray(); fs.Write(dataBytes, 0, dataBytes.Length); } string ffmpegCodecName, ffmpegMuxerName; int format; switch (formatType) { case ConversionFormat.Adpcm: // ADPCM Microsoft ffmpegCodecName = "adpcm_ms"; ffmpegMuxerName = "wav"; format = 0x0002; /* WAVE_FORMAT_ADPCM */ break; case ConversionFormat.Pcm: // PCM signed 16-bit little-endian ffmpegCodecName = "pcm_s16le"; ffmpegMuxerName = "wav"; format = 0x0001; /* WAVE_FORMAT_PCM */ break; case ConversionFormat.WindowsMedia: // Windows Media Audio 2 ffmpegCodecName = "wmav2"; ffmpegMuxerName = "asf"; format = 0x0161; /* WAVE_FORMAT_WMAUDIO2 */ break; case ConversionFormat.Xma: throw new NotSupportedException( "XMA is not a supported encoding format. It is specific to the Xbox 360."); case ConversionFormat.ImaAdpcm: // ADPCM IMA WAV ffmpegCodecName = "adpcm_ima_wav"; ffmpegMuxerName = "wav"; format = 0x0011; /* WAVE_FORMAT_IMA_ADPCM */ break; case ConversionFormat.Aac: // AAC (Advanced Audio Coding) // Requires -strict experimental ffmpegCodecName = "aac"; ffmpegMuxerName = "ipod"; format = 0x0000; /* WAVE_FORMAT_UNKNOWN */ break; case ConversionFormat.Vorbis: // Vorbis ffmpegCodecName = "libvorbis"; ffmpegMuxerName = "ogg"; format = 0x0000; /* WAVE_FORMAT_UNKNOWN */ break; default: // Unknown format throw new NotSupportedException(); } string ffmpegStdout, ffmpegStderr; var ffmpegExitCode = ExternalTool.Run( "ffmpeg", string.Format( "-y -i \"{0}\" -vn -c:a {1} -b:a {2} -f:a {3} -strict experimental \"{4}\"", temporarySource, ffmpegCodecName, QualityToBitRate(quality), ffmpegMuxerName, temporaryOutput), out ffmpegStdout, out ffmpegStderr); if (ffmpegExitCode != 0) { throw new InvalidOperationException("ffmpeg exited with non-zero exit code: \n" + ffmpegStdout + "\n" + ffmpegStderr); } byte[] rawData; using (var fs = new FileStream(temporaryOutput, FileMode.Open, FileAccess.Read)) { rawData = new byte[fs.Length]; fs.Read(rawData, 0, rawData.Length); } if (saveToFile != null) { using (var fs = new FileStream(saveToFile, FileMode.Create, FileAccess.Write)) { fs.Write(rawData, 0, rawData.Length); } this.data = null; } else { this.data = rawData.ToList(); } // Get the audio metadata from the output file string ffprobeStdout, ffprobeStderr; var ffprobeExitCode = ExternalTool.Run( "ffprobe", string.Format("-i \"{0}\" -show_entries streams -v quiet -of flat", temporaryOutput), out ffprobeStdout, out ffprobeStderr); if (ffprobeExitCode != 0) { throw new InvalidOperationException("ffprobe exited with non-zero exit code."); } // Set default values if information is not available. int averageBytesPerSecond = 0; int bitsPerSample = 0; int blockAlign = 0; int channelCount = 0; int sampleRate = 0; double durationInSeconds = 0; var numberFormat = System.Globalization.CultureInfo.InvariantCulture.NumberFormat; foreach (var line in ffprobeStdout.Split(new[] { '\r', '\n', '\0' }, StringSplitOptions.RemoveEmptyEntries)) { var kv = line.Split(new[] { '=' }, 2); switch (kv[0]) { case "streams.stream.0.sample_rate": sampleRate = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.bits_per_sample": bitsPerSample = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.duration": durationInSeconds = double.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.channels": channelCount = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.bit_rate": averageBytesPerSecond = (int.Parse(kv[1].Trim('"'), numberFormat) / 8); break; } } // Calculate blockAlign. switch (formatType) { case ConversionFormat.Adpcm: case ConversionFormat.ImaAdpcm: case ConversionFormat.Pcm: // Block alignment value is the number of bytes in an atomic unit (that is, a block) of audio for a particular format. For Pulse Code Modulation (PCM) formats, the formula for calculating block alignment is as follows: // • Block Alignment = Bytes per Sample x Number of Channels // For example, the block alignment value for 16-bit PCM format mono audio is 2 (2 bytes per sample x 1 channel). For 16-bit PCM format stereo audio, the block alignment value is 4. // https://msdn.microsoft.com/en-us/library/system.speech.audioformat.speechaudioformatinfo.blockalign(v=vs.110).aspx // Get the raw PCM from the output WAV file using (var reader = new BinaryReader(new MemoryStream(rawData))) { data = GetRawWavData(reader, ref blockAlign).ToList(); } break; default: // blockAlign is not available from ffprobe (and may or may not // be relevant for non-PCM formats anyway) break; } this.duration = TimeSpan.FromSeconds(durationInSeconds); this.format = new AudioFormat( averageBytesPerSecond, bitsPerSample, blockAlign, channelCount, format, sampleRate); // Loop start and length in number of samples. Defaults to entire sound loopStart = 0; if (data != null && bitsPerSample > 0 && channelCount > 0) loopLength = data.Count / ((bitsPerSample / 8) * channelCount); else loopLength = 0; } finally { File.Delete(temporarySource); File.Delete(temporaryOutput); } }
public static void ProbeFormat(string sourceFile, out AudioFileType audioFileType, out AudioFormat audioFormat, out TimeSpan duration, out int loopStart, out int loopLength) { string ffprobeStdout, ffprobeStderr; var ffprobeExitCode = ExternalTool.Run( "ffprobe", string.Format("-i \"{0}\" -show_format -show_entries streams -v quiet -of flat", sourceFile), out ffprobeStdout, out ffprobeStderr); if (ffprobeExitCode != 0) { throw new InvalidOperationException("ffprobe exited with non-zero exit code."); } // Set default values if information is not available. int averageBytesPerSecond = 0; int bitsPerSample = 0; int blockAlign = 0; int channelCount = 0; int sampleRate = 0; int format = 0; string sampleFormat = null; double durationInSeconds = 0; var formatName = string.Empty; try { var numberFormat = CultureInfo.InvariantCulture.NumberFormat; foreach (var line in ffprobeStdout.Split(new[] { '\r', '\n', '\0' }, StringSplitOptions.RemoveEmptyEntries)) { var kv = line.Split(new[] { '=' }, 2); switch (kv[0]) { case "streams.stream.0.sample_rate": sampleRate = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.bits_per_sample": bitsPerSample = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.start_time": { double seconds; if (double.TryParse(kv[1].Trim('"'), NumberStyles.Any, numberFormat, out seconds)) { durationInSeconds += seconds; } break; } case "streams.stream.0.duration": durationInSeconds += double.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.channels": channelCount = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.sample_fmt": sampleFormat = kv[1].Trim('"').ToLowerInvariant(); break; case "streams.stream.0.bit_rate": averageBytesPerSecond = (int.Parse(kv[1].Trim('"'), numberFormat) / 8); break; case "format.format_name": formatName = kv[1].Trim('"').ToLowerInvariant(); break; case "streams.stream.0.codec_tag": { var hex = kv[1].Substring(3, kv[1].Length - 4); format = int.Parse(hex, NumberStyles.HexNumber); break; } } } } catch (Exception ex) { throw new InvalidOperationException("Failed to parse ffprobe output.", ex); } // XNA seems to use the sample format for the bits per sample // in the case of non-PCM formats like MP3 and WMA. if (bitsPerSample == 0 && sampleFormat != null) { switch (sampleFormat) { case "u8": case "u8p": bitsPerSample = 8; break; case "s16": case "s16p": bitsPerSample = 16; break; case "s32": case "s32p": case "flt": case "fltp": bitsPerSample = 32; break; case "dbl": case "dblp": bitsPerSample = 64; break; } } // Figure out the file type. var durationMs = (int)Math.Floor(durationInSeconds * 1000.0); if (formatName == "wav") { audioFileType = AudioFileType.Wav; } else if (formatName == "mp3") { audioFileType = AudioFileType.Mp3; format = 1; durationMs = (int)Math.Ceiling(durationInSeconds * 1000.0); bitsPerSample = Math.Min(bitsPerSample, 16); } else if (formatName == "wma" || formatName == "asf") { audioFileType = AudioFileType.Wma; format = 1; durationMs = (int)Math.Ceiling(durationInSeconds * 1000.0); bitsPerSample = Math.Min(bitsPerSample, 16); } else if (formatName == "ogg") { audioFileType = AudioFileType.Ogg; format = 1; durationMs = (int)Math.Ceiling(durationInSeconds * 1000.0); bitsPerSample = Math.Min(bitsPerSample, 16); } else { audioFileType = (AudioFileType)(-1); } // XNA seems to calculate the block alignment directly from // the bits per sample and channel count regardless of the // format of the audio data. // ffprobe doesn't report blockAlign for ADPCM and we cannot calculate it like this if (bitsPerSample > 0 && (format != 2 && format != 17)) { blockAlign = (bitsPerSample * channelCount) / 8; } // XNA seems to only be accurate to the millisecond. duration = TimeSpan.FromMilliseconds(durationMs); // Looks like XNA calculates the average bps from // the sample rate and block alignment. if (blockAlign > 0) { averageBytesPerSecond = sampleRate * blockAlign; } audioFormat = new AudioFormat( averageBytesPerSecond, bitsPerSample, blockAlign, channelCount, format, sampleRate); // Loop start and length in number of samples. For some // reason XNA doesn't report loop length for non-WAV sources. loopStart = 0; if (audioFileType != AudioFileType.Wav) { loopLength = 0; } else { loopLength = (int)Math.Floor(sampleRate * durationInSeconds); } }
/// <summary> /// Transcodes the source audio to the target format and quality. /// </summary> /// <param name="formatType">Format to convert this audio to.</param> /// <param name="quality">Quality of the processed output audio. For streaming formats, it can be one of the following: Low (96 kbps), Medium (128 kbps), Best (192 kbps). For WAV formats, it can be one of the following: Low (11kHz ADPCM), Medium (22kHz ADPCM), Best (44kHz PCM)</param> /// <param name="saveToFile"> /// The name of the file that the converted audio should be saved into. This is used for SongContent, where /// the audio is stored external to the XNB file. If this is null, then the converted audio is stored in /// the Data property. /// </param> public void ConvertFormat(ConversionFormat formatType, ConversionQuality quality, string saveToFile) { var temporarySource = Path.GetTempFileName(); var temporaryOutput = Path.GetTempFileName(); try { using (var fs = new FileStream(temporarySource, FileMode.Create, FileAccess.Write)) { var dataBytes = this.data.ToArray(); fs.Write(dataBytes, 0, dataBytes.Length); } string ffmpegCodecName, ffmpegMuxerName; int format; switch (formatType) { case ConversionFormat.Adpcm: // ADPCM Microsoft ffmpegCodecName = "adpcm_ms"; ffmpegMuxerName = "wav"; format = 0x0002; /* WAVE_FORMAT_ADPCM */ break; case ConversionFormat.Pcm: // PCM signed 16-bit little-endian ffmpegCodecName = "pcm_s16le"; ffmpegMuxerName = "s16le"; format = 0x0001; /* WAVE_FORMAT_PCM */ break; case ConversionFormat.WindowsMedia: // Windows Media Audio 2 ffmpegCodecName = "wmav2"; ffmpegMuxerName = "asf"; format = 0x0161; /* WAVE_FORMAT_WMAUDIO2 */ break; case ConversionFormat.Xma: throw new NotSupportedException( "XMA is not a supported encoding format. It is specific to the Xbox 360."); case ConversionFormat.ImaAdpcm: // ADPCM IMA WAV ffmpegCodecName = "adpcm_ima_wav"; ffmpegMuxerName = "wav"; format = 0x0011; /* WAVE_FORMAT_IMA_ADPCM */ break; case ConversionFormat.Aac: // AAC (Advanced Audio Coding) // Requires -strict experimental ffmpegCodecName = "aac"; ffmpegMuxerName = "ipod"; format = 0x0000; /* WAVE_FORMAT_UNKNOWN */ break; case ConversionFormat.Vorbis: // Vorbis ffmpegCodecName = "libvorbis"; ffmpegMuxerName = "ogg"; format = 0x0000; /* WAVE_FORMAT_UNKNOWN */ break; default: // Unknown format throw new NotSupportedException(); } string ffmpegStdout, ffmpegStderr; var ffmpegExitCode = ExternalTool.Run( "ffmpeg", string.Format( "-y -i \"{0}\" -vn -c:a {1} -b:a {2} -f:a {3} -strict experimental \"{4}\"", temporarySource, ffmpegCodecName, QualityToBitRate(quality), ffmpegMuxerName, temporaryOutput), out ffmpegStdout, out ffmpegStderr); if (ffmpegExitCode != 0) { throw new InvalidOperationException("ffmpeg exited with non-zero exit code: \n" + ffmpegStdout + "\n" + ffmpegStderr); } byte[] rawData; using (var fs = new FileStream(temporaryOutput, FileMode.Open, FileAccess.Read)) { rawData = new byte[fs.Length]; fs.Read(rawData, 0, rawData.Length); } if (saveToFile != null) { using (var fs = new FileStream(saveToFile, FileMode.Create, FileAccess.Write)) { fs.Write(rawData, 0, rawData.Length); } this.data = null; } else { this.data = rawData.ToList(); } string ffprobeStdout, ffprobeStderr; var ffprobeExitCode = ExternalTool.Run( "ffprobe", string.Format("-i \"{0}\" -show_entries streams -v quiet -of flat", temporarySource), out ffprobeStdout, out ffprobeStderr); if (ffprobeExitCode != 0) { throw new InvalidOperationException("ffprobe exited with non-zero exit code."); } // Set default values if information is not available. int averageBytesPerSecond = 0; int bitsPerSample = 0; int blockAlign = 0; int channelCount = 0; int sampleRate = 0; double durationInSeconds = 0; var numberFormat = System.Globalization.CultureInfo.InvariantCulture.NumberFormat; foreach (var line in ffprobeStdout.Split(new[] { '\r', '\n', '\0' }, StringSplitOptions.RemoveEmptyEntries)) { var kv = line.Split(new[] { '=' }, 2); switch (kv[0]) { case "streams.stream.0.sample_rate": sampleRate = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.bits_per_sample": bitsPerSample = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.duration": durationInSeconds = double.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.channels": channelCount = int.Parse(kv[1].Trim('"'), numberFormat); break; case "streams.stream.0.bit_rate": averageBytesPerSecond = (int.Parse(kv[1].Trim('"'), numberFormat) / 8); break; } } // Calculate blockAlign. switch (formatType) { case ConversionFormat.Pcm: // Block alignment value is the number of bytes in an atomic unit (that is, a block) of audio for a particular format. For Pulse Code Modulation (PCM) formats, the formula for calculating block alignment is as follows: // • Block Alignment = Bytes per Sample x Number of Channels // For example, the block alignment value for 16-bit PCM format mono audio is 2 (2 bytes per sample x 1 channel). For 16-bit PCM format stereo audio, the block alignment value is 4. // https://msdn.microsoft.com/en-us/library/system.speech.audioformat.speechaudioformatinfo.blockalign(v=vs.110).aspx blockAlign = (bitsPerSample / 8) * channelCount; break; default: // blockAlign is not available from ffprobe (and may or may not // be relevant for non-PCM formats anyway) break; } this.duration = TimeSpan.FromSeconds(durationInSeconds); this.format = new AudioFormat( averageBytesPerSecond, bitsPerSample, blockAlign, channelCount, format, sampleRate); // Loop start and length in number of samples. Defaults to entire sound loopStart = 0; if (data != null && bitsPerSample > 0 && channelCount > 0) { loopLength = data.Count / ((bitsPerSample / 8) * channelCount); } else { loopLength = 0; } } finally { File.Delete(temporarySource); File.Delete(temporaryOutput); } }
/// <summary> /// Converts the audio using the specified wave format. /// </summary> /// <param name="waveFormat">The WaveFormat to use for the conversion.</param> void ConvertWav(WaveFormat waveFormat) { reader.Position = 0; #if WINDOWS //var mediaTypes = MediaFoundationEncoder.GetOutputMediaTypes(NAudio.MediaFoundation.AudioSubtypes.MFAudioFormat_PCM); using (var resampler = new MediaFoundationResampler(reader, waveFormat)) { using (var outStream = new MemoryStream()) { // Since we cannot determine ahead of time the number of bytes to be // read, read four seconds worth at a time. byte[] bytes = new byte[reader.WaveFormat.AverageBytesPerSecond * 4]; while (true) { int bytesRead = resampler.Read(bytes, 0, bytes.Length); if (bytesRead == 0) break; outStream.Write(bytes, 0, bytesRead); } data = new List<byte>(outStream.ToArray()); format = new AudioFormat(waveFormat); } } #else throw new NotImplementedException(); #endif }