/// <summary> /// Load a wave file for processing. /// </summary> /// <param name="filePath">Waveform file path.</param> public void Load(string filePath) { _filePath = filePath; WaveFile waveFile = new WaveFile(); waveFile.Load(filePath); switch (waveFile.Format.BitsPerSample) { case (int)WaveBitsPerSample.Sixteen: _waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData()); break; default: string message = string.Format(CultureInfo.InvariantCulture, "Only {0} bits per sample waveform is supported. But it is {1} bits per sample of waveform file [{2}].", (int)WaveBitsPerSample.Sixteen, waveFile.Format.BitsPerSample, filePath); throw new NotSupportedException(message); } _samplesPerSecond = waveFile.Format.SamplesPerSecond; }
/// <summary> /// Adds a sentence into wave inventory. /// </summary> /// <param name="sentence">The given sentence.</param> /// <param name="waveFileName">The corresponding wave form file name.</param> public void Add(Sentence sentence, string waveFileName) { WaveFile waveFile = new WaveFile(); waveFile.Load(waveFileName); if (waveFile.Format.SamplesPerSecond != _header.SamplesPerSecond || waveFile.Format.Channels != 1 || waveFile.Format.FormatTag != WaveFormatTag.Pcm) { throw new NotSupportedException(Helper.NeutralFormat( "The waveform format of file [{0}] is not supported.", waveFileName)); } try { Add(sentence, waveFile); } catch (InvalidDataException e) { throw new InvalidDataException(Helper.NeutralFormat("It fails to process the file [{0}].", waveFileName), e); } }
/// <summary> /// Extracts features from the given script. /// </summary> /// <param name="script"> /// The xml script file. /// </param> /// <param name="fileListMap"> /// The file list map. /// </param> /// <param name="alignmentDir"> /// The alignment directory. /// </param> /// <param name="waveDir"> /// The wave directory. /// </param> /// <returns> /// The extracted features in training sentence set. /// </returns> /// <exception cref="ArgumentNullException"> /// Exception. /// </exception> public TrainingSentenceSet Extract(XmlScriptFile script, FileListMap fileListMap, string alignmentDir, string waveDir) { if (script == null) { throw new ArgumentNullException("script"); } if (fileListMap == null) { throw new ArgumentNullException("fileListMap"); } if (alignmentDir == null) { throw new ArgumentNullException("alignmentDir"); } if (waveDir == null) { throw new ArgumentNullException("waveDir"); } TrainingSentenceSet sentenceSet = new TrainingSentenceSet { FileListMap = fileListMap }; List<string> errList = new List<string>(); foreach (string sid in fileListMap.Map.Keys) { ScriptItem item = script.ItemDic[sid]; try { // Loads the segmentation file. SegmentFile segmentFile = new SegmentFile(); segmentFile.Load(fileListMap.BuildPath(alignmentDir, sid, "txt")); // Loads the waveform file to set the end time of the last segmentation. WaveFile waveFile = new WaveFile(); waveFile.Load(fileListMap.BuildPath(waveDir, sid, FileExtensions.Waveform)); segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].EndTime = waveFile.Duration; // Extracts the single script item. Sentence sentence = Extract(item, segmentFile); sentence.TrainingSet = sentenceSet; sentenceSet.Sentences.Add(sid, sentence); } catch (Exception e) { if (!(e is InvalidDataException)) { throw; } // Removes the error sentences. Logger.Log(Helper.BuildExceptionMessage(e)); script.Remove(sid); errList.Add(sid); } } fileListMap.RemoveItems(errList); return sentenceSet; }
/// <summary> /// Extract related features from wave: zero crossing, energy, autocorrelation. /// </summary> /// <param name="args">Arguments: wave file, fea file.</param> /// <param name="logWriter">LogWriter to implement parallel computing interface.</param> /// <exception cref="ArgumentException">Exception.</exception> public static void ExtractRelatedFeaturesOneFile(string[] args, TextWriter logWriter) { // check arguments. if (args.Length < 2) { throw new ArgumentException("Arguments for ExtractRelatedFeaturesOneFile: input wave file, output fea file"); } // check input and output file. string wavePath = args[0]; string feaFile = args[1]; int frameShift = int.Parse(args[2]); int framelength = int.Parse(args[3]); // output <zeroCrossing energy autoCorrelation>. using (StreamWriter sw = new StreamWriter(feaFile, false)) { // load wave WaveFile waveFile = new WaveFile(); waveFile.Load(wavePath); short[] waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData()); // calculate features. for (int i = 0;; ++i) { if ((((i + 1) * frameShift) + (framelength / 2)) > waveData.Length) { break; } int nzero = 0; double energy = 0; double autoCorr = 0; double dsum = 0; double product1 = 0; double product2 = 0; int pos = (i + 1) * frameShift; int nbegin = pos - (framelength / 2); int nend = pos + (framelength / 2); if (nend <= waveData.Length && nbegin >= 0) { if (nbegin == 0) { // process each frame. int j = nbegin; for (; j < nend - 1; ++j) { if ((waveData[j] < 0 && waveData[j + 1] > 0) || (waveData[j] > 0 && waveData[j + 1] < 0) || (waveData[j] == 0 && waveData[j + 1] != 0)) { nzero++; } energy += waveData[j] * waveData[j]; } // calculate energy. energy += waveData[j] * waveData[j]; energy = energy / framelength; energy = 10 * Math.Log(Minimum + energy); } else { // process each frame. int j = nbegin; for (; j < nend - 1; ++j) { if ((waveData[j] < 0 && waveData[j + 1] > 0) || (waveData[j] > 0 && waveData[j + 1] < 0) || (waveData[j] == 0 && waveData[j + 1] != 0)) { nzero++; } energy += waveData[j] * waveData[j]; dsum += waveData[j] * waveData[j - 1]; product1 += waveData[j] * waveData[j]; product2 += waveData[j - 1] * waveData[j - 1]; } // calculate energy. energy += waveData[j] * waveData[j]; energy = energy / framelength; energy = 10 * Math.Log(Minimum + energy); // calculate auto correlation. dsum += waveData[j] * waveData[j - 1]; product1 += waveData[j] * waveData[j]; product2 += waveData[j - 1] * waveData[j - 1]; autoCorr = dsum / Math.Sqrt(product1 * product2); } } sw.WriteLine("{0} {1:F6} {2:F6}", nzero, energy, autoCorr); } } }
/// <summary> /// Extract lpc residual error. /// </summary> /// <param name="args">Arguments: wave file, lpc file, lpc error file.</param> /// <param name="logWriter">LogWriter to implement parallel computing interface.</param> /// <exception cref="ArgumentException">Exception.</exception> public static void ExtractLpcResidualErrorOneFile(string[] args, TextWriter logWriter) { // check arguments if (args.Length < 3) { throw new ArgumentException("Arguments for ExtractLpcResidualErrorOneFile: input wave file, input lpc file, output lpc error file"); } // check input and output file string wavePath = args[0]; string lpcFile = args[1]; string errorFile = args[2]; int frameShift = int.Parse(args[3]); int frameLength = int.Parse(args[4]); // output <zeroCrossing energy autoCorrelation> List<double[]> lpcData = new List<double[]>(); foreach (string line in Helper.FileLines(lpcFile)) { string[] fields = line.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); double[] data = fields.Select(i => double.Parse(i, CultureInfo.InvariantCulture)).ToArray(); lpcData.Add(data); } using (StreamWriter sw = new StreamWriter(errorFile, false)) { // load wave WaveFile waveFile = new WaveFile(); waveFile.Load(wavePath); short[] waveData = ArrayHelper.BinaryConvertArray(waveFile.GetSoundData()); // calculate residual error for (int i = 0; i < lpcData.Count; i++) { int pos = (i + 1) * frameShift; int nbegin = pos - (frameLength / 2); int nend = pos + (frameLength / 2); double energy = 0; // calculate actual value if (nend <= waveData.Length && nbegin >= 0) { for (int j = nbegin; j < nend; j++) { energy += waveData[j] * waveData[j]; } energy = energy / (double)frameLength; double tempt_energy = energy; energy = 10 * Math.Log(Minimum + energy); // calculate prediction value double prediction = 0; for (int k = 0; k < LpcOrder; k++) { double denergy = 0; for (int j = nbegin; j < nend; j++) { if (j - k > 0) { denergy += waveData[j] * waveData[j - k]; } } prediction += lpcData[i][k] * (denergy / (double)frameLength); } prediction = prediction + tempt_energy; prediction = 10 * Math.Log(Math.Abs(prediction) + Minimum); // output residual error sw.WriteLine("{0:F6} {1:F6}", lpcData[i][0], energy - prediction); } } } }
/// <summary> /// Affix waveform file to certain waveform file. /// </summary> /// <param name="sourceWaveDir">Source waveform file directory.</param> /// <param name="targetWaveDir">Target waveform file directory.</param> /// <param name="affixingFile">Affixing waveform file.</param> private static void AffixWaveFiles(string sourceWaveDir, string targetWaveDir, WaveFile affixingFile) { Dictionary<string, string> srcMap = Microsoft.Tts.Offline.FileListMap.Build(sourceWaveDir, ".wav"); foreach (string id in srcMap.Keys) { string dstFilePath = Path.Combine(targetWaveDir, srcMap[id] + ".wav"); if (File.Exists(dstFilePath)) { continue; } string srcFilePath = Path.Combine(sourceWaveDir, srcMap[id] + ".wav"); Helper.EnsureFolderExistForFile(dstFilePath); WaveFile tgtWf = new WaveFile(); WaveFile srcWf = new WaveFile(); srcWf.Load(srcFilePath); tgtWf.Append(affixingFile); tgtWf.Append(srcWf); tgtWf.Append(affixingFile); tgtWf.Save(dstFilePath); } }
/// <summary> /// Create a visual wave instance from the wave stream. /// </summary> /// <param name="waveStream">Stream.</param> /// <returns>VisualWaveForm.</returns> public static VisualWaveForm CreateFromStream(Stream waveStream) { VisualWaveForm waveForm = new VisualWaveForm(); WaveFile waveFile = new WaveFile(); waveFile.Load(waveStream); waveForm.Format = waveFile.Format; TransactionObservableCollection<double> samples = waveForm.WaveSamples; foreach (short sample in waveFile.DataIn16Bits) { samples.Add((double)sample); } waveForm.YAxis.Reset(samples, 0); return waveForm; }
/// <summary> /// Loads head and tail margins for candidates of all sentences. /// </summary> /// <param name="waveDir">Wave directory.</param> /// <param name="marginLength">Cross correlation margin length in millisecond.</param> public void LoadMargin(string waveDir, int marginLength) { foreach (string sid in _idKeyedSentences.Keys) { string waveName = FileListMap.BuildPath(FileListMap, waveDir, sid, FileExtensions.Waveform); if (!File.Exists(waveName)) { throw new FileNotFoundException(Helper.NeutralFormat("Wave file is not found \"{0}\".", waveName)); } WaveFile wave = new WaveFile(); wave.Load(waveName); _idKeyedSentences[sid].LoadMargin(wave, marginLength); } }
/// <summary> /// Resample the source waveform file to 16k Hz waveform file. /// </summary> /// <param name="sourceFile">Location of source waveform file.</param> /// <param name="targetFile">Location of target waveform file.</param> /// <param name="targetSamplesPerSecond">Samples per second of the target waveform file.</param> public static void Resample(string sourceFile, string targetFile, int targetSamplesPerSecond) { if (string.IsNullOrEmpty(sourceFile)) { throw new ArgumentNullException("sourceFile"); } if (string.IsNullOrEmpty(targetFile)) { throw new ArgumentNullException("targetFile"); } WaveFormat format = WaveFile.ReadFormat(sourceFile); if (format.SamplesPerSecond < targetSamplesPerSecond) { throw new NotSupportedException(Helper.NeutralFormat( "Resampling tool will introduce obvious aliasing " + "noise when upsampling from [{0}] to [[1}], refer to bug #12628", format.SamplesPerSecond, targetSamplesPerSecond)); } WaveFile waveFile = new WaveFile(); waveFile.Load(sourceFile); Resample(waveFile, targetSamplesPerSecond); Helper.EnsureFolderExistForFile(targetFile); waveFile.Save(targetFile); }
/// <summary> /// Load a waveform file from file. /// </summary> /// <param name="filePath">Waveform file to load from.</param> /// <returns>WaveFile.</returns> public static WaveFile ReadWaveFile(string filePath) { WaveFile waveFile = new WaveFile(); waveFile.Load(filePath); return waveFile; }