/// <summary> /// Load a wave file for processing. /// </summary> /// <param name="filePath">Waveform file path.</param> public void Load(string filePath) { _filePath = filePath; WaveFile waveFile = new WaveFile(); waveFile.Load(filePath); switch (waveFile.Format.BitsPerSample) { case (int)WaveBitsPerSample.Sixteen: _waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData()); break; default: string message = string.Format(CultureInfo.InvariantCulture, "Only {0} bits per sample waveform is supported. But it is {1} bits per sample of waveform file [{2}].", (int)WaveBitsPerSample.Sixteen, waveFile.Format.BitsPerSample, filePath); throw new NotSupportedException(message); } _samplesPerSecond = waveFile.Format.SamplesPerSecond; }
/// <summary> /// Affix waveform file to certain waveform file. /// </summary> /// <param name="sourceWaveDir">Source waveform file directory.</param> /// <param name="targetWaveDir">Target waveform file directory.</param> /// <param name="affixingFile">Affixing waveform file.</param> private static void AffixWaveFiles(string sourceWaveDir, string targetWaveDir, WaveFile affixingFile) { Dictionary<string, string> srcMap = Microsoft.Tts.Offline.FileListMap.Build(sourceWaveDir, ".wav"); foreach (string id in srcMap.Keys) { string dstFilePath = Path.Combine(targetWaveDir, srcMap[id] + ".wav"); if (File.Exists(dstFilePath)) { continue; } string srcFilePath = Path.Combine(sourceWaveDir, srcMap[id] + ".wav"); Helper.EnsureFolderExistForFile(dstFilePath); WaveFile tgtWf = new WaveFile(); WaveFile srcWf = new WaveFile(); srcWf.Load(srcFilePath); tgtWf.Append(affixingFile); tgtWf.Append(srcWf); tgtWf.Append(affixingFile); tgtWf.Save(dstFilePath); } }
/// <summary> /// Reset. /// </summary> public void Reset() { _waveFile = null; _sampleOffset = 0; _sampleLength = 0; _movingMode = false; _cumulateShift = 0; Invalidate(); }
/// <summary> /// Write a Single[] to a WAV file. Mostly for debugging purposes, this /// Routine generates a WAV file whose duration is the same as /// That of the original waveform. /// </summary> /// <param name="filePath">Target file to save.</param> /// <param name="outWave">Waveform samples to save.</param> /// <param name="samplesPerSecond">Samples per second.</param> public static void WriteWaveFile(string filePath, float[] outWave, int samplesPerSecond) { short[] waveData = ArrayHelper.ToInt16<float>(outWave); WaveFormat waveFormat = new WaveFormat(); waveFormat.Channels = 1; waveFormat.BlockAlign = 2; waveFormat.BitsPerSample = 16; waveFormat.ExtSize = 0; waveFormat.FormatTag = WaveFormatTag.Pcm; waveFormat.SamplesPerSecond = samplesPerSecond; waveFormat.AverageBytesPerSecond = checked(samplesPerSecond * 2); WaveFile waveFile = new WaveFile(); waveFile.Format = waveFormat; RiffChunk waveDataChunk = waveFile.Riff.GetChunk(Riff.IdData); byte[] byteData = ArrayHelper.BinaryConvertArray(waveData); waveDataChunk.SetData(byteData); waveDataChunk.Size = waveDataChunk.GetData().Length; waveFile.Save(filePath); }
/// <summary> /// Writes the right margin if possible. /// </summary> /// <param name="waveFile">The given wave file where the current candidate belongs to.</param> /// <param name="candidate">The current candidate.</param> /// <param name="candidateInfo">The candidate information of the current candidate.</param> private void WriteRightMargin(WaveFile waveFile, UnitCandidate candidate, WaveCandidateInfo candidateInfo) { if (_ccMarginLength + _fsMarginLength > 0) { int rightMarginLength = (_ccMarginLength / 2) + _fsMarginLength; int waveSampleOffsetInSentence = (int)((candidate.StartTimeInSecond * waveFile.Format.SamplesPerSecond) + 0.5f); int waveSampleLength = (int)(((candidate.EndTimeInSecond - candidate.StartTimeInSecond) * waveFile.Format.SamplesPerSecond) + 0.5f); // Right margin section. if (candidate.Index == candidate.Sentence.Candidates.Count - 1) { // It means the candidate is the last one, there is no next candidate. So, writes some zero as margin. WriteZeroMargin(rightMarginLength); } else if (candidate.Sentence.Candidates[candidate.Index + 1].Id == UnitCandidate.InvalidId) { // There is a next candidate and it isn't in the inventory. So, writes the next candidate as margin. int offset = (int)(waveSampleOffsetInSentence + waveSampleLength); int count = (waveFile.GetSoundData().Length / (waveFile.Format.BitsPerSample / 8)) - offset; if (count < rightMarginLength) { WriteIntoInventory(ConvertsWaveDataFormat(waveFile, offset, count)); WriteZeroMargin(rightMarginLength - count); } else { WriteIntoInventory(ConvertsWaveDataFormat(waveFile, offset, rightMarginLength)); } } } }
/// <summary> /// Adds a sentence into wave inventory. /// </summary> /// <param name="sentence">The given sentence.</param> /// <param name="waveFile">The corresponding wave form file.</param> private void Add(Sentence sentence, WaveFile waveFile) { Debug.Assert(waveFile.Format.SamplesPerSecond == _header.SamplesPerSecond && waveFile.Format.Channels == 1 && waveFile.Format.FormatTag == WaveFormatTag.Pcm, "Only supports source waveform with single channel, PCM and same sampling rate."); // Here, I change the original design. Original design is not save the wave data of pruned candidate, but it will introduce bug when current frame shifting // design happens, so I change the design as to save all wave data into inventory file, it will make .WVE data size increases 30%. It is fine for M1. // Consider more candidates will be pruned in M2, so we need a refactor on wave inventory creation module. To ensure minimum disk size as well as no bug. int firstValidIndex = sentence.Candidates.Count; for (int candIdx = 0; candIdx < sentence.Candidates.Count; candIdx++) { UnitCandidate candidate = sentence.Candidates[candIdx]; int waveSampleOffsetInSentence = (int)((candidate.StartTimeInSecond * waveFile.Format.SamplesPerSecond) + 0.5f); int waveSampleLength = (int)(((candidate.EndTimeInSecond - candidate.StartTimeInSecond) * waveFile.Format.SamplesPerSecond) + 0.5f); if (candidate.Id != UnitCandidate.InvalidId) { if (waveSampleLength > ushort.MaxValue) { throw new InvalidDataException(Helper.NeutralFormat( "The wave sample length of {0}-th candidate in file {1}.wav overflows.", candIdx, sentence.Id)); } WaveCandidateInfo candidateInfo = new WaveCandidateInfo { Name = candidate.Name, Id = candidate.Id, GlobalId = candidate.GlobalId, SentenceId = candidate.Sentence.Id, IndexOfNonSilence = (ushort)candidate.IndexOfNonSilence, FrameIndexInSentence = (ushort)candidate.StartFrame, FrameNumber = (ushort)(candidate.EndFrame - candidate.StartFrame), FrameIndex = (uint)(sentence.GlobalFrameIndex + candidate.StartFrame), }; if (firstValidIndex > candIdx && _indexingFile.SamplePerFrame == 0) { firstValidIndex = candIdx; if (candidateInfo.FrameNumber != 0) { _indexingFile.SamplePerFrame = (uint)(waveSampleLength / candidateInfo.FrameNumber); } } else { if (candidateInfo.FrameNumber != 0) { Debug.Assert(_indexingFile.SamplePerFrame == (uint)(waveSampleLength / candidateInfo.FrameNumber)); } } // calc left/right extensible margin, shift at most 1 units to ensure less than 1 unit. int leftMarginUnitIdx = Math.Max(0, candIdx - 1); int rightMarginUnitIdx = Math.Min(candIdx + 1, sentence.Candidates.Count - 1); int leftMarginFrame = candidate.StartFrame - sentence.Candidates[leftMarginUnitIdx].StartFrame; int rightMarginFrame = sentence.Candidates[rightMarginUnitIdx].EndFrame - candidate.EndFrame; Debug.Assert(leftMarginFrame >= 0 && rightMarginFrame >= 0); candidateInfo.LeftMarginInFrame = (byte)Math.Min(leftMarginFrame, MaxMarginInFrame); candidateInfo.RightMarginInFrame = (byte)Math.Min(rightMarginFrame, MaxMarginInFrame); // Writes the current candidate, throw exception if unit index alignment is inconsistent with wave inventory. long candidatePosition = candidateInfo.FrameIndex * // frame _millisecondPerFrame * // convert frame to millisecond (waveFile.Format.SamplesPerSecond / 1000) * // get samples per milliseconds (1s == 1000ms), convert millisecond to sample _header.BytesPerSample; // convert sample to byte long wavePosition = _writer.BaseStream.Position - _dataOffset; if (candidatePosition != wavePosition) { throw new InvalidDataException(Helper.NeutralFormat( "Frame {0} in sentence {1} starts at {2}, which is inconsistent with position in wave inventory {3}.\r\nPossible cause: bad MLF alignment.", candidateInfo.FrameIndexInSentence, candidateInfo.SentenceId, candidateInfo.FrameIndex, wavePosition)); } WriteIntoInventory(ConvertsWaveDataFormat(waveFile, waveSampleOffsetInSentence, waveSampleLength)); _indexingFile.Add(candidateInfo); } else { WriteIntoInventory(ConvertsWaveDataFormat(waveFile, waveSampleOffsetInSentence, waveSampleLength)); } } }
/// <summary> /// Extracts features from the given script. /// </summary> /// <param name="script"> /// The xml script file. /// </param> /// <param name="fileListMap"> /// The file list map. /// </param> /// <param name="alignmentDir"> /// The alignment directory. /// </param> /// <param name="waveDir"> /// The wave directory. /// </param> /// <returns> /// The extracted features in training sentence set. /// </returns> /// <exception cref="ArgumentNullException"> /// Exception. /// </exception> public TrainingSentenceSet Extract(XmlScriptFile script, FileListMap fileListMap, string alignmentDir, string waveDir) { if (script == null) { throw new ArgumentNullException("script"); } if (fileListMap == null) { throw new ArgumentNullException("fileListMap"); } if (alignmentDir == null) { throw new ArgumentNullException("alignmentDir"); } if (waveDir == null) { throw new ArgumentNullException("waveDir"); } TrainingSentenceSet sentenceSet = new TrainingSentenceSet { FileListMap = fileListMap }; List<string> errList = new List<string>(); foreach (string sid in fileListMap.Map.Keys) { ScriptItem item = script.ItemDic[sid]; try { // Loads the segmentation file. SegmentFile segmentFile = new SegmentFile(); segmentFile.Load(fileListMap.BuildPath(alignmentDir, sid, "txt")); // Loads the waveform file to set the end time of the last segmentation. WaveFile waveFile = new WaveFile(); waveFile.Load(fileListMap.BuildPath(waveDir, sid, FileExtensions.Waveform)); segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].EndTime = waveFile.Duration; // Extracts the single script item. Sentence sentence = Extract(item, segmentFile); sentence.TrainingSet = sentenceSet; sentenceSet.Sentences.Add(sid, sentence); } catch (Exception e) { if (!(e is InvalidDataException)) { throw; } // Removes the error sentences. Logger.Log(Helper.BuildExceptionMessage(e)); script.Remove(sid); errList.Add(sid); } } fileListMap.RemoveItems(errList); return sentenceSet; }
/// <summary> /// Extract related features from wave: zero crossing, energy, autocorrelation. /// </summary> /// <param name="args">Arguments: wave file, fea file.</param> /// <param name="logWriter">LogWriter to implement parallel computing interface.</param> /// <exception cref="ArgumentException">Exception.</exception> public static void ExtractRelatedFeaturesOneFile(string[] args, TextWriter logWriter) { // check arguments. if (args.Length < 2) { throw new ArgumentException("Arguments for ExtractRelatedFeaturesOneFile: input wave file, output fea file"); } // check input and output file. string wavePath = args[0]; string feaFile = args[1]; int frameShift = int.Parse(args[2]); int framelength = int.Parse(args[3]); // output <zeroCrossing energy autoCorrelation>. using (StreamWriter sw = new StreamWriter(feaFile, false)) { // load wave WaveFile waveFile = new WaveFile(); waveFile.Load(wavePath); short[] waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData()); // calculate features. for (int i = 0;; ++i) { if ((((i + 1) * frameShift) + (framelength / 2)) > waveData.Length) { break; } int nzero = 0; double energy = 0; double autoCorr = 0; double dsum = 0; double product1 = 0; double product2 = 0; int pos = (i + 1) * frameShift; int nbegin = pos - (framelength / 2); int nend = pos + (framelength / 2); if (nend <= waveData.Length && nbegin >= 0) { if (nbegin == 0) { // process each frame. int j = nbegin; for (; j < nend - 1; ++j) { if ((waveData[j] < 0 && waveData[j + 1] > 0) || (waveData[j] > 0 && waveData[j + 1] < 0) || (waveData[j] == 0 && waveData[j + 1] != 0)) { nzero++; } energy += waveData[j] * waveData[j]; } // calculate energy. energy += waveData[j] * waveData[j]; energy = energy / framelength; energy = 10 * Math.Log(Minimum + energy); } else { // process each frame. int j = nbegin; for (; j < nend - 1; ++j) { if ((waveData[j] < 0 && waveData[j + 1] > 0) || (waveData[j] > 0 && waveData[j + 1] < 0) || (waveData[j] == 0 && waveData[j + 1] != 0)) { nzero++; } energy += waveData[j] * waveData[j]; dsum += waveData[j] * waveData[j - 1]; product1 += waveData[j] * waveData[j]; product2 += waveData[j - 1] * waveData[j - 1]; } // calculate energy. energy += waveData[j] * waveData[j]; energy = energy / framelength; energy = 10 * Math.Log(Minimum + energy); // calculate auto correlation. dsum += waveData[j] * waveData[j - 1]; product1 += waveData[j] * waveData[j]; product2 += waveData[j - 1] * waveData[j - 1]; autoCorr = dsum / Math.Sqrt(product1 * product2); } } sw.WriteLine("{0} {1:F6} {2:F6}", nzero, energy, autoCorr); } } }
/// <summary> /// Convert the WaveFile instance into another samples per second. /// </summary> /// <param name="waveFile">Waveform instance to resample.</param> /// <param name="targetSamplesPerSecond">Samples per second of the target waveform file.</param> public static void Resample(WaveFile waveFile, int targetSamplesPerSecond) { if (waveFile == null) { throw new ArgumentNullException("waveFile"); } if (waveFile.Riff == null) { string message = Helper.NeutralFormat("The Riff of wave file should not bu null."); throw new ArgumentNullException("waveFile", message); } if (waveFile.DataIn16Bits == null) { string message = Helper.NeutralFormat("The DataIn16Bits of wave file should not bu null."); throw new ArgumentNullException("waveFile", message); } if (waveFile.Format.BitsPerSample != SupportedBitsPerSample) { string message = string.Format(CultureInfo.InvariantCulture, "Only {0}bit waveform file supported for resampling.", SupportedBitsPerSample); throw new NotSupportedException(message); } if (waveFile.Format.Channels != SupportedChannels) { string message = string.Format(CultureInfo.InvariantCulture, "Only {0} channel waveform file supported for resampling.", SupportedChannels); throw new NotSupportedException(message); } // Do nothing if both samples per second are the same if (waveFile.Format.SamplesPerSecond != targetSamplesPerSecond) { // If both samples per second are not the same // Validate cache data encoded in Short if (waveFile.DataIn16Bits.Length != waveFile.GetSoundData().Length / sizeof(short)) { string message = string.Format(CultureInfo.InvariantCulture, "The Data in 16 bits buffer is not updated with the sound data."); Debug.Assert(false, message); throw new InvalidDataException(message); } ResampleFilter resample = new ResampleFilter(waveFile.Format.SamplesPerSecond, targetSamplesPerSecond); // Re-sample short[] targetSamples = resample.Resample(waveFile.DataIn16Bits); // Update the target sound data into the WaveFile instance RiffChunk dataChunk = waveFile.Riff.GetChunk(Riff.IdData); dataChunk.SetData(ArrayHelper.BinaryConvertArray(targetSamples)); WaveFormat format = waveFile.Format; format.SamplesPerSecond = targetSamplesPerSecond; format.AverageBytesPerSecond = format.SamplesPerSecond * waveFile.Format.BitsPerSample / 8; waveFile.Format = format; } }
/// <summary> /// Resample the source waveform file to 16k Hz waveform file. /// </summary> /// <param name="sourceFile">Location of source waveform file.</param> /// <param name="targetFile">Location of target waveform file.</param> /// <param name="targetSamplesPerSecond">Samples per second of the target waveform file.</param> public static void Resample(string sourceFile, string targetFile, int targetSamplesPerSecond) { if (string.IsNullOrEmpty(sourceFile)) { throw new ArgumentNullException("sourceFile"); } if (string.IsNullOrEmpty(targetFile)) { throw new ArgumentNullException("targetFile"); } WaveFormat format = WaveFile.ReadFormat(sourceFile); if (format.SamplesPerSecond < targetSamplesPerSecond) { throw new NotSupportedException(Helper.NeutralFormat( "Resampling tool will introduce obvious aliasing " + "noise when upsampling from [{0}] to [[1}], refer to bug #12628", format.SamplesPerSecond, targetSamplesPerSecond)); } WaveFile waveFile = new WaveFile(); waveFile.Load(sourceFile); Resample(waveFile, targetSamplesPerSecond); Helper.EnsureFolderExistForFile(targetFile); waveFile.Save(targetFile); }
/// <summary> /// Append other wavefile instance to this instance. /// </summary> /// <param name="wf">Wave file.</param> public void Append(WaveFile wf) { if (wf == null) { throw new ArgumentNullException("wf"); } if (_riff == null) { Initialze(); Format = wf.Format; } if (!Format.Equals(wf.Format)) { string message = string.Format(CultureInfo.InvariantCulture, "Current format should not be different with the waveform file to append."); throw new ArgumentException(message, "wf"); } RiffChunk dataChunk = _riff.GetChunk(Riff.IdData); if (dataChunk == null) { dataChunk = new RiffChunk(); dataChunk.Id = Riff.IdData; _riff.Chunks.Add(dataChunk); } dataChunk.Append(wf.GetSoundData()); }
/// <summary> /// Cut certain piece of data in this waveform file. /// </summary> /// <param name="startTime">Start time in second.</param> /// <param name="duration">Waveform time duration in second.</param> /// <returns>Cut wavefile.</returns> public WaveFile Cut(double startTime, double duration) { if (startTime < 0.0f) { string message = string.Format(CultureInfo.InvariantCulture, "The start time [{0}] of location in waveform should not be negative.", startTime); throw new ArgumentException(message); } if (duration <= 0.0f) { string message = string.Format(CultureInfo.InvariantCulture, "The duration time [{0}] of location in waveform should be greater than zero.", duration); throw new ArgumentException(message); } WaveFile wf = new WaveFile(); wf.Riff = DoCut(startTime, duration); return wf; }
/// <summary> /// Merge two waveform files into 2-channel waveform file. /// </summary> /// <param name="leftFile">Left waveform file for left channel, i.e. first channel.</param> /// <param name="rightFile">Right waveform file for left channel, i.e. second channel.</param> /// <returns>Merged waveform file.</returns> public static WaveFile MergeTwoChannels(WaveFile leftFile, WaveFile rightFile) { if (leftFile == null) { throw new ArgumentNullException("leftFile"); } if (rightFile == null) { throw new ArgumentNullException("rightFile"); } if (leftFile.Format != rightFile.Format) { string message = string.Format(CultureInfo.InvariantCulture, "Both waveform files should share the same formant."); throw new InvalidDataException(message); } if (leftFile.GetSoundData().Length != rightFile.GetSoundData().Length) { string message = string.Format(CultureInfo.InvariantCulture, "Both waveform files should have the same samples."); throw new InvalidDataException(message); } if (leftFile.Format.Channels != 1) { string message = string.Format(CultureInfo.InvariantCulture, "Only single channel waveform file is supported to merge."); throw new InvalidDataException(message); } WaveFile targetFile = new WaveFile(); WaveFormat format = leftFile.Format; format.Channels = 2; format.AverageBytesPerSecond *= format.Channels; format.BlockAlign *= format.Channels; targetFile.Format = format; byte[] data = new byte[leftFile.GetSoundData().Length * format.Channels]; for (int i = 0; i < leftFile.GetSoundData().Length; i += leftFile.Format.BlockAlign) { Buffer.BlockCopy(leftFile.GetSoundData(), i, data, i * format.Channels, leftFile.Format.BlockAlign); Buffer.BlockCopy(rightFile.GetSoundData(), i, data, (i * format.Channels) + leftFile.Format.BlockAlign, leftFile.Format.BlockAlign); } RiffChunk chunk = targetFile.Riff.GetChunk(Riff.IdData); chunk.SetData(data); return targetFile; }
/// <summary> /// Split a 2-channel waveform file into two waveform file. /// </summary> /// <param name="waveFile">Source 2-channel waveform file instance.</param> /// <returns>2 waveform files in the collection.</returns> public static WaveFile[] SplitIntoTwoChannels(WaveFile waveFile) { if (waveFile == null) { throw new ArgumentNullException("waveFile"); } if (waveFile.Format.Channels != 2) { string message = string.Format(CultureInfo.InvariantCulture, "Only support split two channels waveform files."); throw new NotSupportedException(message); } WaveFormat targetFormat = new WaveFormat(); targetFormat = waveFile.Format; targetFormat.Channels = 1; targetFormat.BlockAlign /= waveFile.Format.Channels; targetFormat.AverageBytesPerSecond /= waveFile.Format.Channels; byte[][] channels = waveFile.SplitChannels(); if (channels.Length != 2) { string message = string.Format(CultureInfo.InvariantCulture, "Invalid channel number [{0}] found for splitting [{1}], which should equal to 2.", channels.Length, waveFile.FilePath); throw new InvalidDataException(message); } WaveFile[] files = new WaveFile[waveFile.Format.Channels]; files[0] = new WaveFile(); files[0].Format = targetFormat; RiffChunk firstChannelWaveDataChunk = files[0].Riff.GetChunk(Riff.IdData); firstChannelWaveDataChunk.SetData(channels[0]); files[1] = new WaveFile(); files[1].Format = targetFormat; RiffChunk secondChannelWaveDataChunk = files[1].Riff.GetChunk(Riff.IdData); secondChannelWaveDataChunk.SetData(channels[1]); return files; }
/// <summary> /// Build silence waveform file. /// </summary> /// <param name="silenceDuration">Silence duration.</param> /// <returns>Silence waveform file path.</returns> private static WaveFile BuildSilenceWaveFile(float silenceDuration) { WaveFile wf = new WaveFile(); WaveFormat fmt = new WaveFormat(); fmt.Channels = 1; fmt.BlockAlign = 2; fmt.BitsPerSample = 16; fmt.ExtSize = 0; fmt.FormatTag = WaveFormatTag.Pcm; fmt.SamplesPerSecond = 16000; fmt.AverageBytesPerSecond = 32000; wf.Format = fmt; RiffChunk wave = wf.Riff.GetChunk(Riff.IdData); int sampleCount = (int)(silenceDuration * fmt.AverageBytesPerSecond); sampleCount -= sampleCount % 2; // align wave.SetData(new byte[sampleCount]); wave.Size = wave.GetData().Length; return wf; }
/// <summary> /// Loads head and tail margins for candidates of all sentences. /// </summary> /// <param name="waveDir">Wave directory.</param> /// <param name="marginLength">Cross correlation margin length in millisecond.</param> public void LoadMargin(string waveDir, int marginLength) { foreach (string sid in _idKeyedSentences.Keys) { string waveName = FileListMap.BuildPath(FileListMap, waveDir, sid, FileExtensions.Waveform); if (!File.Exists(waveName)) { throw new FileNotFoundException(Helper.NeutralFormat("Wave file is not found \"{0}\".", waveName)); } WaveFile wave = new WaveFile(); wave.Load(waveName); _idKeyedSentences[sid].LoadMargin(wave, marginLength); } }
/// <summary> /// Extract lpc residual error. /// </summary> /// <param name="args">Arguments: wave file, lpc file, lpc error file.</param> /// <param name="logWriter">LogWriter to implement parallel computing interface.</param> /// <exception cref="ArgumentException">Exception.</exception> public static void ExtractLpcResidualErrorOneFile(string[] args, TextWriter logWriter) { // check arguments if (args.Length < 3) { throw new ArgumentException("Arguments for ExtractLpcResidualErrorOneFile: input wave file, input lpc file, output lpc error file"); } // check input and output file string wavePath = args[0]; string lpcFile = args[1]; string errorFile = args[2]; int frameShift = int.Parse(args[3]); int frameLength = int.Parse(args[4]); // output <zeroCrossing energy autoCorrelation> List<double[]> lpcData = new List<double[]>(); foreach (string line in Helper.FileLines(lpcFile)) { string[] fields = line.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); double[] data = fields.Select(i => double.Parse(i, CultureInfo.InvariantCulture)).ToArray(); lpcData.Add(data); } using (StreamWriter sw = new StreamWriter(errorFile, false)) { // load wave WaveFile waveFile = new WaveFile(); waveFile.Load(wavePath); short[] waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData()); // calculate residual error for (int i = 0; i < lpcData.Count; i++) { int pos = (i + 1) * frameShift; int nbegin = pos - (frameLength / 2); int nend = pos + (frameLength / 2); double energy = 0; // calculate actual value if (nend <= waveData.Length && nbegin >= 0) { for (int j = nbegin; j < nend; j++) { energy += waveData[j] * waveData[j]; } energy = energy / (double)frameLength; double tempt_energy = energy; energy = 10 * Math.Log(Minimum + energy); // calculate prediction value double prediction = 0; for (int k = 0; k < LpcOrder; k++) { double denergy = 0; for (int j = nbegin; j < nend; j++) { if (j - k > 0) { denergy += waveData[j] * waveData[j - k]; } } prediction += lpcData[i][k] * (denergy / (double)frameLength); } prediction = prediction + tempt_energy; prediction = 10 * Math.Log(Math.Abs(prediction) + Minimum); // output residual error sw.WriteLine("{0:F6} {1:F6}", lpcData[i][0], energy - prediction); } } } }
/// <summary> /// Loads head and tail margins for each candidates. /// </summary> /// <param name="wave">WaveFile from which to load wave data.</param> /// <param name="marginLength">Cross correlation margin length in millisecond.</param> public void LoadMargin(WaveFile wave, int marginLength) { foreach (UnitCandidate candidate in Candidates) { candidate.LoadMargin(wave, marginLength); } }
/// <summary> /// Reset. /// </summary> public void Reset() { _waveFile = null; _waveformView.Reset(); _horScaleBar.Reset(); Invalidate(); }
/// <summary> /// Create a visual wave instance from the wave stream. /// </summary> /// <param name="waveStream">Stream.</param> /// <returns>VisualWaveForm.</returns> public static VisualWaveForm CreateFromStream(Stream waveStream) { VisualWaveForm waveForm = new VisualWaveForm(); WaveFile waveFile = new WaveFile(); waveFile.Load(waveStream); waveForm.Format = waveFile.Format; TransactionObservableCollection<double> samples = waveForm.WaveSamples; foreach (short sample in waveFile.DataIn16Bits) { samples.Add((double)sample); } waveForm.YAxis.Reset(samples, 0); return waveForm; }
/// <summary> /// Adds a sentence into wave inventory. /// </summary> /// <param name="sentence">The given sentence.</param> /// <param name="waveFileName">The corresponding wave form file name.</param> public void Add(Sentence sentence, string waveFileName) { WaveFile waveFile = new WaveFile(); waveFile.Load(waveFileName); if (waveFile.Format.SamplesPerSecond != _header.SamplesPerSecond || waveFile.Format.Channels != 1 || waveFile.Format.FormatTag != WaveFormatTag.Pcm) { throw new NotSupportedException(Helper.NeutralFormat( "The waveform format of file [{0}] is not supported.", waveFileName)); } try { Add(sentence, waveFile); } catch (InvalidDataException e) { throw new InvalidDataException(Helper.NeutralFormat("It fails to process the file [{0}].", waveFileName), e); } }
/// <summary> /// Loads wave margin for the candidate. /// </summary> /// <param name="wave">WaveFile from which to load the data.</param> /// <param name="marginLength">Cross correlation margin length in millisecond.</param> public void LoadMargin(WaveFile wave, int marginLength) { if (Id != InvalidId) { double durationInSecond = marginLength * 0.001; if (StartTimeInSecond - durationInSecond < 0 || wave.Duration < StartTimeInSecond + durationInSecond || EndTimeInSecond - (durationInSecond / 2) < 0 || wave.Duration < EndTimeInSecond + (durationInSecond / 2)) { throw new InvalidDataException("Wave duration is shorter than expected duration of margin"); } HeadMargin = wave.Cut(StartTimeInSecond - durationInSecond, durationInSecond * 2).DataIn16Bits; TailMargin = wave.Cut(EndTimeInSecond - (durationInSecond / 2), durationInSecond).DataIn16Bits; int headSampleCount = (int)(durationInSecond * 2 * wave.Format.SamplesPerSecond); if (headSampleCount != HeadMargin.Length || headSampleCount != TailMargin.Length * 2) { throw new InvalidDataException("Margin data is not correctly generated"); } } }
/// <summary> /// Writes the left margin if possible. /// </summary> /// <param name="waveFile">The given wave file where the current candidate belongs to.</param> /// <param name="candidate">The current candidate.</param> /// <param name="candidateInfo">The candidate information of the current candidate.</param> private void WriteLeftMargin(WaveFile waveFile, UnitCandidate candidate, WaveCandidateInfo candidateInfo) { if (_ccMarginLength + _fsMarginLength > 0) { int leftMarginLength = _ccMarginLength + _fsMarginLength; int waveSampleOffsetInSentence = (int)((candidate.StartTimeInSecond * waveFile.Format.SamplesPerSecond) + 0.5f); // Left margin section. if (candidate.Index == 0) { // It means the candidate is the first one, there is no previous candidate. So, writes some zero as margin. WriteZeroMargin(leftMarginLength); } else if (candidate.Sentence.Candidates[candidate.Index - 1].Id == UnitCandidate.InvalidId) { // There is a previous candidate and it isn't in the inventory. So, writes the previous candidate as margin. int offset = (int)(waveSampleOffsetInSentence - leftMarginLength); int count = leftMarginLength; if (offset < 0) { // The margin is longer than the previous candidate, uses zero to fill them. WriteZeroMargin(-offset); count += offset; offset = 0; } WriteIntoInventory(ConvertsWaveDataFormat(waveFile, offset, count)); } } }
/// <summary> /// Reset. /// </summary> public void Reset() { _waveFile = null; _timeMarks.Clear(); _zoomX = 1.0f; _positionRatioX = 0; _viewSampleOffset = 0; _viewSampleLength = 0; Invalidate(); }
/// <summary> /// Converts the wave data into proper format. /// </summary> /// <param name="waveFile">The given WaveFile object in which the data will be converted.</param> /// <param name="offset">The offset in sample count of the data will be converted.</param> /// <param name="count">The count in sample count of the data will be converted.</param> /// <returns>Waveform data in supported format of voice font.</returns> private byte[] ConvertsWaveDataFormat(WaveFile waveFile, int offset, int count) { Debug.Assert(waveFile.Format.FormatTag == WaveFormatTag.Pcm, "The source format tag should be PCM."); byte[] data = new byte[count * _header.BytesPerSample]; // Format conversion. if (_header.FormatCategory == WaveFormatTag.Pcm) { if (waveFile.Format.BitsPerSample == _header.BytesPerSample * 8) { offset *= _header.BytesPerSample; count *= _header.BytesPerSample; Array.Copy(waveFile.GetSoundData(), offset, data, 0, count); } else if (_header.BytesPerSample == 1) { short[] dataIn16Bits = waveFile.DataIn16Bits; for (int i = 0; i < data.Length; ++i) { data[i] = (byte)((dataIn16Bits[i + offset] / 256) + 128); // Convert 16-bit to 8-bit. } } else if (_header.BytesPerSample == 2) { throw new NotSupportedException("It is unsupported to convert 8-bit to 16-bit"); } } else if (_header.FormatCategory == WaveFormatTag.Mulaw) { Debug.Assert(_header.SamplesPerSecond == 8000, "Only supports 8k Hz for mulaw voice."); Debug.Assert(_header.BytesPerSample == 1, "Only supports 1 byte per sample for mulaw voice."); Debug.Assert(_header.Compression == WaveCompressCatalog.Unc, "Only supports uncompress encoding for mulaw voice."); if (waveFile.Format.SamplesPerSecond != 8000) { string message = Helper.NeutralFormat( "Samples per second [{0}] of source waveform file should be the same with that [{1}] of target voice.", waveFile.Format.SamplesPerSecond, _header.SamplesPerSecond); throw new InvalidDataException(message); } if (waveFile.Format.BitsPerSample != 16 || waveFile.Format.BlockAlign != 2) { string message = Helper.NeutralFormat( "Only supports 16 bits per sample and 2 bytes alignment, while that of source waveform file is [{0}] and [{1}].", waveFile.Format.BitsPerSample, waveFile.Format.BlockAlign); throw new InvalidDataException(message); } // Converts 16bits PCM samples to 8 bits Mulaw samples short[] soundData = waveFile.DataIn16Bits; for (int i = 0; i < count; i++) { data[i] = SampleConverter.LinearToUlaw(soundData[offset + i]); } } else { // Bug #70735 is filed to track: Currently, Compress is not supported in RUS offline inventory building. throw new NotSupportedException( Helper.NeutralFormat("Unsupported target format [{0}].", _header.FormatCategory)); } return data; }
/// <summary> /// Load a waveform file from file. /// </summary> /// <param name="filePath">Waveform file to load from.</param> /// <returns>WaveFile.</returns> public static WaveFile ReadWaveFile(string filePath) { WaveFile waveFile = new WaveFile(); waveFile.Load(filePath); return waveFile; }