/// <summary> /// Writes the right margin if possible. /// </summary> /// <param name="waveFile">The given wave file where the current candidate belongs to.</param> /// <param name="candidate">The current candidate.</param> /// <param name="candidateInfo">The candidate information of the current candidate.</param> private void WriteRightMargin(WaveFile waveFile, UnitCandidate candidate, WaveCandidateInfo candidateInfo) { if (_ccMarginLength + _fsMarginLength > 0) { int rightMarginLength = (_ccMarginLength / 2) + _fsMarginLength; int waveSampleOffsetInSentence = (int)((candidate.StartTimeInSecond * waveFile.Format.SamplesPerSecond) + 0.5f); int waveSampleLength = (int)(((candidate.EndTimeInSecond - candidate.StartTimeInSecond) * waveFile.Format.SamplesPerSecond) + 0.5f); // Right margin section. if (candidate.Index == candidate.Sentence.Candidates.Count - 1) { // It means the candidate is the last one, there is no next candidate. So, writes some zero as margin. WriteZeroMargin(rightMarginLength); } else if (candidate.Sentence.Candidates[candidate.Index + 1].Id == UnitCandidate.InvalidId) { // There is a next candidate and it isn't in the inventory. So, writes the next candidate as margin. int offset = (int)(waveSampleOffsetInSentence + waveSampleLength); int count = (waveFile.GetSoundData().Length / (waveFile.Format.BitsPerSample / 8)) - offset; if (count < rightMarginLength) { WriteIntoInventory(ConvertsWaveDataFormat(waveFile, offset, count)); WriteZeroMargin(rightMarginLength - count); } else { WriteIntoInventory(ConvertsWaveDataFormat(waveFile, offset, rightMarginLength)); } } } }
/// <summary> /// Writes the left margin if possible. /// </summary> /// <param name="waveFile">The given wave file where the current candidate belongs to.</param> /// <param name="candidate">The current candidate.</param> /// <param name="candidateInfo">The candidate information of the current candidate.</param> private void WriteLeftMargin(WaveFile waveFile, UnitCandidate candidate, WaveCandidateInfo candidateInfo) { if (_ccMarginLength + _fsMarginLength > 0) { int leftMarginLength = _ccMarginLength + _fsMarginLength; int waveSampleOffsetInSentence = (int)((candidate.StartTimeInSecond * waveFile.Format.SamplesPerSecond) + 0.5f); // Left margin section. if (candidate.Index == 0) { // It means the candidate is the first one, there is no previous candidate. So, writes some zero as margin. WriteZeroMargin(leftMarginLength); } else if (candidate.Sentence.Candidates[candidate.Index - 1].Id == UnitCandidate.InvalidId) { // There is a previous candidate and it isn't in the inventory. So, writes the previous candidate as margin. int offset = (int)(waveSampleOffsetInSentence - leftMarginLength); int count = leftMarginLength; if (offset < 0) { // The margin is longer than the previous candidate, uses zero to fill them. WriteZeroMargin(-offset); count += offset; offset = 0; } WriteIntoInventory(ConvertsWaveDataFormat(waveFile, offset, count)); } } }
/// <summary> /// Loads the unit index and candidate data section from binary reader. /// </summary> /// <param name="reader">The given binary reader.</param> private void LoadCandidates(BinaryReader reader) { _waveCandidates.Clear(); _namedUnitIndexId.Clear(); reader.BaseStream.Seek(UnitIndexOffset, SeekOrigin.Begin); List<UnitIndexInfo> unitIndexInfos = new List<UnitIndexInfo>(); for (int i = 0; i < UnitIndexCount; ++i) { UnitIndexInfo unitIndexInfo = new UnitIndexInfo(); unitIndexInfo.Load(reader); unitIndexInfos.Add(unitIndexInfo); } int invalidUnit = 1; for (int i = 0; i < unitIndexInfos.Count; ++i) { if (unitIndexInfos[i].Offset != UnitIndexInfo.InvalidOffset) { reader.BaseStream.Seek(unitIndexInfos[i].Offset, SeekOrigin.Begin); _waveCandidates.Add(i, new IdKeyedWaveCandidateInfos()); string name = _stringPool.GetString((int)unitIndexInfos[i].UnitNameOffset); _namedUnitIndexId.Add(name, i); _unitNameStringPoolOffsets.Add(name, unitIndexInfos[i].UnitNameOffset); int globalId = (int)((unitIndexInfos[i].Offset - CandidateDataOffset) / WaveCandidateInfo.DataSize); for (int j = 0; j < unitIndexInfos[i].Count; ++j) { WaveCandidateInfo wci = new WaveCandidateInfo(); wci.Load(reader); _waveCandidates[i].Add(j, wci); wci.Id = j; wci.GlobalId = globalId + j; wci.Name = name; wci.SentenceId = _stringPool.GetString((int)wci.SentenceIdOffset); } } else { string name = string.Empty; if (invalidUnit == 1) { name = "hpl_SIL"; } else if (invalidUnit == 2) { name = "hpr_SIL"; } else { throw new InvalidDataException("There are more than 2 InvalidOffset unit, the number suppose be two: hpl_SIL and hpr_SIL."); } invalidUnit++; _namedUnitIndexId.Add(name, i); } } }
/// <summary> /// Adds a sentence into wave inventory. /// </summary> /// <param name="sentence">The given sentence.</param> /// <param name="waveFile">The corresponding wave form file.</param> private void Add(Sentence sentence, WaveFile waveFile) { Debug.Assert(waveFile.Format.SamplesPerSecond == _header.SamplesPerSecond && waveFile.Format.Channels == 1 && waveFile.Format.FormatTag == WaveFormatTag.Pcm, "Only supports source waveform with single channel, PCM and same sampling rate."); // Here, I change the original design. Original design is not save the wave data of pruned candidate, but it will introduce bug when current frame shifting // design happens, so I change the design as to save all wave data into inventory file, it will make .WVE data size increases 30%. It is fine for M1. // Consider more candidates will be pruned in M2, so we need a refactor on wave inventory creation module. To ensure minimum disk size as well as no bug. int firstValidIndex = sentence.Candidates.Count; for (int candIdx = 0; candIdx < sentence.Candidates.Count; candIdx++) { UnitCandidate candidate = sentence.Candidates[candIdx]; int waveSampleOffsetInSentence = (int)((candidate.StartTimeInSecond * waveFile.Format.SamplesPerSecond) + 0.5f); int waveSampleLength = (int)(((candidate.EndTimeInSecond - candidate.StartTimeInSecond) * waveFile.Format.SamplesPerSecond) + 0.5f); if (candidate.Id != UnitCandidate.InvalidId) { if (waveSampleLength > ushort.MaxValue) { throw new InvalidDataException(Helper.NeutralFormat( "The wave sample length of {0}-th candidate in file {1}.wav overflows.", candIdx, sentence.Id)); } WaveCandidateInfo candidateInfo = new WaveCandidateInfo { Name = candidate.Name, Id = candidate.Id, GlobalId = candidate.GlobalId, SentenceId = candidate.Sentence.Id, IndexOfNonSilence = (ushort)candidate.IndexOfNonSilence, FrameIndexInSentence = (ushort)candidate.StartFrame, FrameNumber = (ushort)(candidate.EndFrame - candidate.StartFrame), FrameIndex = (uint)(sentence.GlobalFrameIndex + candidate.StartFrame), }; if (firstValidIndex > candIdx && _indexingFile.SamplePerFrame == 0) { firstValidIndex = candIdx; if (candidateInfo.FrameNumber != 0) { _indexingFile.SamplePerFrame = (uint)(waveSampleLength / candidateInfo.FrameNumber); } } else { if (candidateInfo.FrameNumber != 0) { Debug.Assert(_indexingFile.SamplePerFrame == (uint)(waveSampleLength / candidateInfo.FrameNumber)); } } // calc left/right extensible margin, shift at most 1 units to ensure less than 1 unit. int leftMarginUnitIdx = Math.Max(0, candIdx - 1); int rightMarginUnitIdx = Math.Min(candIdx + 1, sentence.Candidates.Count - 1); int leftMarginFrame = candidate.StartFrame - sentence.Candidates[leftMarginUnitIdx].StartFrame; int rightMarginFrame = sentence.Candidates[rightMarginUnitIdx].EndFrame - candidate.EndFrame; Debug.Assert(leftMarginFrame >= 0 && rightMarginFrame >= 0); candidateInfo.LeftMarginInFrame = (byte)Math.Min(leftMarginFrame, MaxMarginInFrame); candidateInfo.RightMarginInFrame = (byte)Math.Min(rightMarginFrame, MaxMarginInFrame); // Writes the current candidate, throw exception if unit index alignment is inconsistent with wave inventory. long candidatePosition = candidateInfo.FrameIndex * // frame _millisecondPerFrame * // convert frame to millisecond (waveFile.Format.SamplesPerSecond / 1000) * // get samples per milliseconds (1s == 1000ms), convert millisecond to sample _header.BytesPerSample; // convert sample to byte long wavePosition = _writer.BaseStream.Position - _dataOffset; if (candidatePosition != wavePosition) { throw new InvalidDataException(Helper.NeutralFormat( "Frame {0} in sentence {1} starts at {2}, which is inconsistent with position in wave inventory {3}.\r\nPossible cause: bad MLF alignment.", candidateInfo.FrameIndexInSentence, candidateInfo.SentenceId, candidateInfo.FrameIndex, wavePosition)); } WriteIntoInventory(ConvertsWaveDataFormat(waveFile, waveSampleOffsetInSentence, waveSampleLength)); _indexingFile.Add(candidateInfo); } else { WriteIntoInventory(ConvertsWaveDataFormat(waveFile, waveSampleOffsetInSentence, waveSampleLength)); } } }
/// <summary> /// Adds a wave candidate info into unit indexing file. /// </summary> /// <param name="candidateInfo">The candidate information will be wrote into unit indexing file.</param> public void Add(WaveCandidateInfo candidateInfo) { if (_namedUnitIndexId == null) { throw new InvalidOperationException("Add() method can only be applied in object initialized with namedUnitTypeId"); } if (candidateInfo == null) { throw new ArgumentNullException("candidateInfo"); } if (!_namedUnitIndexId.ContainsKey(candidateInfo.Name)) { throw new InvalidDataException(Helper.NeutralFormat("Unknown candidate name \"{0}\"", candidateInfo.Name)); } int unitTypeId = _namedUnitIndexId[candidateInfo.Name]; if (!_waveCandidates.ContainsKey(unitTypeId)) { _waveCandidates.Add(unitTypeId, new IdKeyedWaveCandidateInfos()); } if (_waveCandidates[unitTypeId].ContainsKey(candidateInfo.Id)) { throw new InvalidDataException(Helper.NeutralFormat("Duplicated candidate id \"{0}\"", candidateInfo.Id)); } _waveCandidates[unitTypeId].Add(candidateInfo.Id, candidateInfo); candidateInfo.SentenceIdOffset = (uint)_stringPool.PutString(candidateInfo.SentenceId); // Update fields. // Increment the candidate count. ++CandidateCount; // The candidate count will impact the string pool offset and the overall data size. StringPoolOffset += WaveCandidateInfo.DataSize; DataSize += WaveCandidateInfo.DataSize; // Since we didn't know the size of string pool increased or not, so it can be subtracted and then added. DataSize -= StringPoolSize; StringPoolSize = (uint)_stringPool.Length; DataSize += StringPoolSize; }