/// <summary> /// Load a wave file for processing. /// </summary> /// <param name="filePath">Waveform file path.</param> public void Load(string filePath) { _filePath = filePath; WaveFile waveFile = new WaveFile(); waveFile.Load(filePath); switch (waveFile.Format.BitsPerSample) { case (int)WaveBitsPerSample.Sixteen: _waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData()); break; default: string message = string.Format(CultureInfo.InvariantCulture, "Only {0} bits per sample waveform is supported. But it is {1} bits per sample of waveform file [{2}].", (int)WaveBitsPerSample.Sixteen, waveFile.Format.BitsPerSample, filePath); throw new NotSupportedException(message); } _samplesPerSecond = waveFile.Format.SamplesPerSecond; }
/// <summary> /// Converts the wave data into proper format. /// </summary> /// <param name="waveFile">The given WaveFile object in which the data will be converted.</param> /// <param name="offset">The offset in sample count of the data will be converted.</param> /// <param name="count">The count in sample count of the data will be converted.</param> /// <returns>Waveform data in supported format of voice font.</returns> private byte[] ConvertsWaveDataFormat(WaveFile waveFile, int offset, int count) { Debug.Assert(waveFile.Format.FormatTag == WaveFormatTag.Pcm, "The source format tag should be PCM."); byte[] data = new byte[count * _header.BytesPerSample]; // Format conversion. if (_header.FormatCategory == WaveFormatTag.Pcm) { if (waveFile.Format.BitsPerSample == _header.BytesPerSample * 8) { offset *= _header.BytesPerSample; count *= _header.BytesPerSample; Array.Copy(waveFile.GetSoundData(), offset, data, 0, count); } else if (_header.BytesPerSample == 1) { short[] dataIn16Bits = waveFile.DataIn16Bits; for (int i = 0; i < data.Length; ++i) { data[i] = (byte)((dataIn16Bits[i + offset] / 256) + 128); // Convert 16-bit to 8-bit. } } else if (_header.BytesPerSample == 2) { throw new NotSupportedException("It is unsupported to convert 8-bit to 16-bit"); } } else if (_header.FormatCategory == WaveFormatTag.Mulaw) { Debug.Assert(_header.SamplesPerSecond == 8000, "Only supports 8k Hz for mulaw voice."); Debug.Assert(_header.BytesPerSample == 1, "Only supports 1 byte per sample for mulaw voice."); Debug.Assert(_header.Compression == WaveCompressCatalog.Unc, "Only supports uncompress encoding for mulaw voice."); if (waveFile.Format.SamplesPerSecond != 8000) { string message = Helper.NeutralFormat( "Samples per second [{0}] of source waveform file should be the same with that [{1}] of target voice.", waveFile.Format.SamplesPerSecond, _header.SamplesPerSecond); throw new InvalidDataException(message); } if (waveFile.Format.BitsPerSample != 16 || waveFile.Format.BlockAlign != 2) { string message = Helper.NeutralFormat( "Only supports 16 bits per sample and 2 bytes alignment, while that of source waveform file is [{0}] and [{1}].", waveFile.Format.BitsPerSample, waveFile.Format.BlockAlign); throw new InvalidDataException(message); } // Converts 16bits PCM samples to 8 bits Mulaw samples short[] soundData = waveFile.DataIn16Bits; for (int i = 0; i < count; i++) { data[i] = SampleConverter.LinearToUlaw(soundData[offset + i]); } } else { // Bug #70735 is filed to track: Currently, Compress is not supported in RUS offline inventory building. throw new NotSupportedException( Helper.NeutralFormat("Unsupported target format [{0}].", _header.FormatCategory)); } return data; }
/// <summary> /// Writes the right margin if possible. /// </summary> /// <param name="waveFile">The given wave file where the current candidate belongs to.</param> /// <param name="candidate">The current candidate.</param> /// <param name="candidateInfo">The candidate information of the current candidate.</param> private void WriteRightMargin(WaveFile waveFile, UnitCandidate candidate, WaveCandidateInfo candidateInfo) { if (_ccMarginLength + _fsMarginLength > 0) { int rightMarginLength = (_ccMarginLength / 2) + _fsMarginLength; int waveSampleOffsetInSentence = (int)((candidate.StartTimeInSecond * waveFile.Format.SamplesPerSecond) + 0.5f); int waveSampleLength = (int)(((candidate.EndTimeInSecond - candidate.StartTimeInSecond) * waveFile.Format.SamplesPerSecond) + 0.5f); // Right margin section. if (candidate.Index == candidate.Sentence.Candidates.Count - 1) { // It means the candidate is the last one, there is no next candidate. So, writes some zero as margin. WriteZeroMargin(rightMarginLength); } else if (candidate.Sentence.Candidates[candidate.Index + 1].Id == UnitCandidate.InvalidId) { // There is a next candidate and it isn't in the inventory. So, writes the next candidate as margin. int offset = (int)(waveSampleOffsetInSentence + waveSampleLength); int count = (waveFile.GetSoundData().Length / (waveFile.Format.BitsPerSample / 8)) - offset; if (count < rightMarginLength) { WriteIntoInventory(ConvertsWaveDataFormat(waveFile, offset, count)); WriteZeroMargin(rightMarginLength - count); } else { WriteIntoInventory(ConvertsWaveDataFormat(waveFile, offset, rightMarginLength)); } } } }
/// <summary> /// Extract related features from wave: zero crossing, energy, autocorrelation. /// </summary> /// <param name="args">Arguments: wave file, fea file.</param> /// <param name="logWriter">LogWriter to implement parallel computing interface.</param> /// <exception cref="ArgumentException">Exception.</exception> public static void ExtractRelatedFeaturesOneFile(string[] args, TextWriter logWriter) { // check arguments. if (args.Length < 2) { throw new ArgumentException("Arguments for ExtractRelatedFeaturesOneFile: input wave file, output fea file"); } // check input and output file. string wavePath = args[0]; string feaFile = args[1]; int frameShift = int.Parse(args[2]); int framelength = int.Parse(args[3]); // output <zeroCrossing energy autoCorrelation>. using (StreamWriter sw = new StreamWriter(feaFile, false)) { // load wave WaveFile waveFile = new WaveFile(); waveFile.Load(wavePath); short[] waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData()); // calculate features. for (int i = 0;; ++i) { if ((((i + 1) * frameShift) + (framelength / 2)) > waveData.Length) { break; } int nzero = 0; double energy = 0; double autoCorr = 0; double dsum = 0; double product1 = 0; double product2 = 0; int pos = (i + 1) * frameShift; int nbegin = pos - (framelength / 2); int nend = pos + (framelength / 2); if (nend <= waveData.Length && nbegin >= 0) { if (nbegin == 0) { // process each frame. int j = nbegin; for (; j < nend - 1; ++j) { if ((waveData[j] < 0 && waveData[j + 1] > 0) || (waveData[j] > 0 && waveData[j + 1] < 0) || (waveData[j] == 0 && waveData[j + 1] != 0)) { nzero++; } energy += waveData[j] * waveData[j]; } // calculate energy. energy += waveData[j] * waveData[j]; energy = energy / framelength; energy = 10 * Math.Log(Minimum + energy); } else { // process each frame. int j = nbegin; for (; j < nend - 1; ++j) { if ((waveData[j] < 0 && waveData[j + 1] > 0) || (waveData[j] > 0 && waveData[j + 1] < 0) || (waveData[j] == 0 && waveData[j + 1] != 0)) { nzero++; } energy += waveData[j] * waveData[j]; dsum += waveData[j] * waveData[j - 1]; product1 += waveData[j] * waveData[j]; product2 += waveData[j - 1] * waveData[j - 1]; } // calculate energy. energy += waveData[j] * waveData[j]; energy = energy / framelength; energy = 10 * Math.Log(Minimum + energy); // calculate auto correlation. dsum += waveData[j] * waveData[j - 1]; product1 += waveData[j] * waveData[j]; product2 += waveData[j - 1] * waveData[j - 1]; autoCorr = dsum / Math.Sqrt(product1 * product2); } } sw.WriteLine("{0} {1:F6} {2:F6}", nzero, energy, autoCorr); } } }
/// <summary> /// Extract lpc residual error. /// </summary> /// <param name="args">Arguments: wave file, lpc file, lpc error file.</param> /// <param name="logWriter">LogWriter to implement parallel computing interface.</param> /// <exception cref="ArgumentException">Exception.</exception> public static void ExtractLpcResidualErrorOneFile(string[] args, TextWriter logWriter) { // check arguments if (args.Length < 3) { throw new ArgumentException("Arguments for ExtractLpcResidualErrorOneFile: input wave file, input lpc file, output lpc error file"); } // check input and output file string wavePath = args[0]; string lpcFile = args[1]; string errorFile = args[2]; int frameShift = int.Parse(args[3]); int frameLength = int.Parse(args[4]); // output <zeroCrossing energy autoCorrelation> List<double[]> lpcData = new List<double[]>(); foreach (string line in Helper.FileLines(lpcFile)) { string[] fields = line.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); double[] data = fields.Select(i => double.Parse(i, CultureInfo.InvariantCulture)).ToArray(); lpcData.Add(data); } using (StreamWriter sw = new StreamWriter(errorFile, false)) { // load wave WaveFile waveFile = new WaveFile(); waveFile.Load(wavePath); short[] waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData()); // calculate residual error for (int i = 0; i < lpcData.Count; i++) { int pos = (i + 1) * frameShift; int nbegin = pos - (frameLength / 2); int nend = pos + (frameLength / 2); double energy = 0; // calculate actual value if (nend <= waveData.Length && nbegin >= 0) { for (int j = nbegin; j < nend; j++) { energy += waveData[j] * waveData[j]; } energy = energy / (double)frameLength; double tempt_energy = energy; energy = 10 * Math.Log(Minimum + energy); // calculate prediction value double prediction = 0; for (int k = 0; k < LpcOrder; k++) { double denergy = 0; for (int j = nbegin; j < nend; j++) { if (j - k > 0) { denergy += waveData[j] * waveData[j - k]; } } prediction += lpcData[i][k] * (denergy / (double)frameLength); } prediction = prediction + tempt_energy; prediction = 10 * Math.Log(Math.Abs(prediction) + Minimum); // output residual error sw.WriteLine("{0:F6} {1:F6}", lpcData[i][0], energy - prediction); } } } }
/// <summary> /// Convert the WaveFile instance into another samples per second. /// </summary> /// <param name="waveFile">Waveform instance to resample.</param> /// <param name="targetSamplesPerSecond">Samples per second of the target waveform file.</param> public static void Resample(WaveFile waveFile, int targetSamplesPerSecond) { if (waveFile == null) { throw new ArgumentNullException("waveFile"); } if (waveFile.Riff == null) { string message = Helper.NeutralFormat("The Riff of wave file should not bu null."); throw new ArgumentNullException("waveFile", message); } if (waveFile.DataIn16Bits == null) { string message = Helper.NeutralFormat("The DataIn16Bits of wave file should not bu null."); throw new ArgumentNullException("waveFile", message); } if (waveFile.Format.BitsPerSample != SupportedBitsPerSample) { string message = string.Format(CultureInfo.InvariantCulture, "Only {0}bit waveform file supported for resampling.", SupportedBitsPerSample); throw new NotSupportedException(message); } if (waveFile.Format.Channels != SupportedChannels) { string message = string.Format(CultureInfo.InvariantCulture, "Only {0} channel waveform file supported for resampling.", SupportedChannels); throw new NotSupportedException(message); } // Do nothing if both samples per second are the same if (waveFile.Format.SamplesPerSecond != targetSamplesPerSecond) { // If both samples per second are not the same // Validate cache data encoded in Short if (waveFile.DataIn16Bits.Length != waveFile.GetSoundData().Length / sizeof(short)) { string message = string.Format(CultureInfo.InvariantCulture, "The Data in 16 bits buffer is not updated with the sound data."); Debug.Assert(false, message); throw new InvalidDataException(message); } ResampleFilter resample = new ResampleFilter(waveFile.Format.SamplesPerSecond, targetSamplesPerSecond); // Re-sample short[] targetSamples = resample.Resample(waveFile.DataIn16Bits); // Update the target sound data into the WaveFile instance RiffChunk dataChunk = waveFile.Riff.GetChunk(Riff.IdData); dataChunk.SetData(ArrayHelper.BinaryConvertArray(targetSamples)); WaveFormat format = waveFile.Format; format.SamplesPerSecond = targetSamplesPerSecond; format.AverageBytesPerSecond = format.SamplesPerSecond * waveFile.Format.BitsPerSample / 8; waveFile.Format = format; } }
/// <summary> /// Append other wavefile instance to this instance. /// </summary> /// <param name="wf">Wave file.</param> public void Append(WaveFile wf) { if (wf == null) { throw new ArgumentNullException("wf"); } if (_riff == null) { Initialze(); Format = wf.Format; } if (!Format.Equals(wf.Format)) { string message = string.Format(CultureInfo.InvariantCulture, "Current format should not be different with the waveform file to append."); throw new ArgumentException(message, "wf"); } RiffChunk dataChunk = _riff.GetChunk(Riff.IdData); if (dataChunk == null) { dataChunk = new RiffChunk(); dataChunk.Id = Riff.IdData; _riff.Chunks.Add(dataChunk); } dataChunk.Append(wf.GetSoundData()); }
/// <summary> /// Merge two waveform files into 2-channel waveform file. /// </summary> /// <param name="leftFile">Left waveform file for left channel, i.e. first channel.</param> /// <param name="rightFile">Right waveform file for left channel, i.e. second channel.</param> /// <returns>Merged waveform file.</returns> public static WaveFile MergeTwoChannels(WaveFile leftFile, WaveFile rightFile) { if (leftFile == null) { throw new ArgumentNullException("leftFile"); } if (rightFile == null) { throw new ArgumentNullException("rightFile"); } if (leftFile.Format != rightFile.Format) { string message = string.Format(CultureInfo.InvariantCulture, "Both waveform files should share the same formant."); throw new InvalidDataException(message); } if (leftFile.GetSoundData().Length != rightFile.GetSoundData().Length) { string message = string.Format(CultureInfo.InvariantCulture, "Both waveform files should have the same samples."); throw new InvalidDataException(message); } if (leftFile.Format.Channels != 1) { string message = string.Format(CultureInfo.InvariantCulture, "Only single channel waveform file is supported to merge."); throw new InvalidDataException(message); } WaveFile targetFile = new WaveFile(); WaveFormat format = leftFile.Format; format.Channels = 2; format.AverageBytesPerSecond *= format.Channels; format.BlockAlign *= format.Channels; targetFile.Format = format; byte[] data = new byte[leftFile.GetSoundData().Length * format.Channels]; for (int i = 0; i < leftFile.GetSoundData().Length; i += leftFile.Format.BlockAlign) { Buffer.BlockCopy(leftFile.GetSoundData(), i, data, i * format.Channels, leftFile.Format.BlockAlign); Buffer.BlockCopy(rightFile.GetSoundData(), i, data, (i * format.Channels) + leftFile.Format.BlockAlign, leftFile.Format.BlockAlign); } RiffChunk chunk = targetFile.Riff.GetChunk(Riff.IdData); chunk.SetData(data); return targetFile; }