/// <summary> /// Extracts the features of the given utterance. /// </summary> /// <param name="sentId"> /// Sentence id. /// </param> /// <param name="utterance"> /// Service Provider utterance object. /// </param> /// <returns> /// The sentence contains all the features. /// </returns> /// <exception cref="InvalidDataException"> /// Exception. /// </exception> public Sentence Extract(string sentId, TtsUtterance utterance) { List<FeatureVector> vectors; try { // Then, extracts the features. vectors = ExtractionEngine.Extract(utterance, FeatureMetas); } catch (EspException e) { throw new InvalidDataException(Helper.NeutralFormat("Extract feature error on sentence \"{0}\"", sentId), e); } // Validates the extracted vectors. if (vectors.Count != FeatureMetas.Count) { throw new InvalidDataException( Helper.NeutralFormat("Length of result is mismatch on sentence \"{0}\"", sentId)); } for (int i = 0; i < vectors.Count; i++) { if (vectors[i].Count != utterance.Phones.Count) { throw new InvalidDataException( Helper.NeutralFormat("Length of vector is mismatch on sentence \"{0}\"", sentId)); } } // Creates a sentence to store all the features. Sentence sentence = new Sentence { Id = sentId }; for (int i = 0; i < vectors[0].Count; ++i) { // Create candidates for each phoneme. PhoneSegment p = new PhoneSegment { Sentence = sentence, Index = i, Features = vectors.Select(v => v[i]) .Skip(LabelFeatureNameSet.MandatoryFeatureNames.Length).ToArray(), }; // Create the label to store the features. Label label = new Label(FeatureNameSet); for (int j = 0; j < vectors.Count; ++j) { if (vectors[j][i].ValueType == FeatureValueType.FEATURE_VALUE_TYPE_UNKOWN) { label.SetFeatureValue(FeatureNameSet.FeatureNames[j], Label.NotApplicableFeatureValue); } else if (FeatureMetas[j].Property == TtsFeatureProperty.TTS_FEATURE_PROPERTY_PHONE_ID) { Phone phone = PhoneSet.GetPhone(vectors[j][i].IntValue); label.SetFeatureValue(FeatureNameSet.FeatureNames[j], Offline.Phoneme.ToHtk(phone.Name)); } else { label.SetFeatureValue(FeatureNameSet.FeatureNames[j], vectors[j][i].IntValue.ToString(CultureInfo.InvariantCulture)); } // Updates the corresponding value records. FeatureValueRecords[j].Update(vectors[j][i]); } p.Label = label; sentence.PhoneSegments.Add(p); } return sentence; }
/// <summary> /// Initializes a new instance of the <see cref="PhoneCandidate"/> class. /// </summary> /// <param name="phoneme">Phone segment link.</param> public PhoneCandidate(PhoneSegment phoneme) { Type = UnitCandidateType.Phone; Id = InvalidId; Label = phoneme.Label; Name = phoneme.Name; LeftPhoneme = phoneme.LeftPhoneme; RightPhoneme = phoneme.RightPhoneme; StartTime = phoneme.StartTime; EndTime = phoneme.EndTime; Index = phoneme.Index; IndexOfNonSilence = phoneme.IndexOfNonSilence; MustHold = false; PhoneSegmentLink = phoneme; Sentence = phoneme.Sentence; SilenceCandidate = phoneme.Name == Phoneme.ToHtk(Phoneme.SilencePhone); }
/// <summary> /// Initializes a new instance of the <see cref="HalfPhoneCandidate"/> class. /// </summary> /// <param name="phoneme">Phone segment link.</param> /// <param name="isLeftHalfPhone">Flag to left or right half phone.</param> public HalfPhoneCandidate(PhoneSegment phoneme, bool isLeftHalfPhone) { Type = UnitCandidateType.Halfphone; Id = InvalidId; Label = phoneme.Label; // bug #93090, TBD: Offline should call serviceProvider to get half phone name and boundary IsLeftHalfPhone = isLeftHalfPhone; Name = isLeftHalfPhone ? "hpl_" + phoneme.Name : "hpr_" + phoneme.Name; int middleTime = (int)(((phoneme.StateAlignments[2].StartTime + phoneme.StateAlignments[2].EndTime) / 2 / 50000.0) + 0.6) * 50000; StartTime = isLeftHalfPhone ? phoneme.StartTime : middleTime; EndTime = isLeftHalfPhone ? middleTime : phoneme.EndTime; LeftPhoneme = phoneme.LeftPhoneme; RightPhoneme = phoneme.RightPhoneme; Index = isLeftHalfPhone ? phoneme.Index * 2 : (phoneme.Index * 2) + 1; IndexOfNonSilence = isLeftHalfPhone ? phoneme.IndexOfNonSilence * 2 : (phoneme.IndexOfNonSilence * 2) + 1; MustHold = false; PhoneSegmentLink = phoneme; Sentence = phoneme.Sentence; SilenceCandidate = phoneme.Name == Phoneme.ToHtk(Phoneme.SilencePhone); }
/// <summary> /// Loads one sentence from the master label file. /// Please notice this load may be called multi-times to load the different /// Information, such as full-context label, alignment data and so on. /// </summary> /// <param name="reader">StreamReader of master label file.</param> /// <returns>A bool value indicates whether end of sentence exists.</returns> public bool Load(StreamReader reader) { // Load all the lines belong to this sentence. bool endOfSentenceExist = false; List<string> lines = LoadLines(reader, ref endOfSentenceExist); int indexOfLines = 0; if (_phoneSegments.Count == 0) { // The first time to load the phone segment. int index = 0; int indexOfNonSilence = 0; while (indexOfLines < lines.Count) { PhoneSegment phoneSegment = new PhoneSegment(); phoneSegment.Load(lines, ref indexOfLines); phoneSegment.Index = index++; phoneSegment.IndexOfNonSilence = (!Phoneme.IsSilenceFeature(phoneSegment.Name)) ? indexOfNonSilence++ : -1; phoneSegment.Sentence = this; _phoneSegments.Add(phoneSegment); } } else { // Reload information about the phone segment. try { foreach (PhoneSegment phoneSegment in _phoneSegments) { phoneSegment.Load(lines, ref indexOfLines); } } catch (InvalidDataException e) { throw new InvalidDataException("Mismatched data between multi master label files", e); } } if (indexOfLines != lines.Count) { throw new InvalidDataException("Mismatched data between multi master label files"); } return endOfSentenceExist; }