/// <summary> /// Updates the fields according to the given text. /// </summary> /// <param name="text">The given text of this label.</param> private void UpdateFields(string text) { string[] features = text.Split(LabelFeatureNameSet.SeparatorChars.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); switch (features.Length) { case 1: // Just a central phoneme, update the LabelFeatureNameSet accordingly. _featureNames = LabelFeatureNameSet.MonoLabel; _featureValues = features; _text = text; break; case 2: // Invalid text for Label. throw new InvalidDataException(Helper.NeutralFormat("Invalid Htk label \"{0}\"", text)); default: if (_featureNames.Count != LabelFeatureNameSet.Default.Count && features.Length != _featureNames.Count) { // Since the number of feature is mismatch with the number of feature name, exception thrown. throw new InvalidDataException("Unmatched feature value and feature name"); } _featureValues = features; _text = text; break; } }
/// <summary> /// Initializes a new instance of the PreSelectionData class according to given forest and sentenceSet. /// </summary> /// <param name="forest">The given forest.</param> /// <param name="sentenceSet">The given sentence set where to find candiates.</param> /// <param name="fullFeatureNameSet">The full feature set to parse tree.</param> public PreSelectionData(DecisionForest forest, TrainingSentenceSet sentenceSet, LabelFeatureNameSet fullFeatureNameSet) { if (forest == null) { throw new ArgumentNullException("forest"); } if (sentenceSet == null) { throw new ArgumentNullException("sentenceSet"); } if (fullFeatureNameSet == null) { throw new ArgumentNullException("fullFeatureNameSet"); } _decisionForest = forest; _sentenceSet = sentenceSet; _nameIndexedCandidateGroup = new Dictionary<string, CandidateGroup>(); // Create empty candidate group. foreach (DecisionTree tree in forest.TreeList) { foreach (DecisionTreeNode node in tree.LeafNodeMap.Values) { CandidateGroup candidateGroup = new CandidateGroup { Name = node.Name, Id = _nameIndexedCandidateGroup.Count }; _nameIndexedCandidateGroup.Add(candidateGroup.Name, candidateGroup); } } // Travel the training sentence set to find the corresponding candidates. foreach (Sentence sentence in sentenceSet.Sentences.Values) { foreach (UnitCandidate candidate in sentence.Candidates) { if (!candidate.SilenceCandidate) { candidate.Label.FeatureNameSet = fullFeatureNameSet; DecisionTree[] linkedDecisionTrees = forest.TreeList.Where(t => t.Name == candidate.Name).ToArray(); Debug.Assert(linkedDecisionTrees.Length == 1, Helper.NeutralFormat("Invalidated: More than 1 {0} Preselection tree are linked to unit {1}", linkedDecisionTrees.Length, candidate.Name)); DecisionTreeNode leafNode = DecisionForestExtension.FilterTree(linkedDecisionTrees[0].NodeList[0], forest.Questions, candidate.Label); Debug.Assert(leafNode != null, Helper.NeutralFormat("cannot find leaf node for candidate {0} in sentence {1}", candidate.Name, sentence.Id)); _nameIndexedCandidateGroup[leafNode.Name].Candidates.Add(candidate); } } } // Verify there is no empty candidate group. foreach (CandidateGroup candidateGroup in _nameIndexedCandidateGroup.Values) { if (candidateGroup.Candidates.Count <= 0) { throw new InvalidDataException( Helper.NeutralFormat("There is no candidate in candidate group \"{0}\"", candidateGroup.Name)); } } }
/// <summary> /// Resizes the feature value according to the given LabelFeatureNameSet. /// </summary> /// <param name="set">The given LabelFeatureNameSet.</param> public void ResizeFeatureValue(LabelFeatureNameSet set) { _featureNames = set; // null _featureValues indidates it's initialized, so needn't update. if (_featureValues != null) { int oldLength = _featureValues.Length; Array.Resize(ref _featureValues, set.Count); // Set the new fields as NotApplicableFeatureValue. for (int i = oldLength; i < _featureValues.Length; ++i) { _featureValues[i] = NotApplicableFeatureValue; } // The _text should be updated. _text = null; } }
/// <summary> /// Initializes a new instance of the Label class as a copy of the given one. /// </summary> /// <param name="label">The given label to copy.</param> public Label(Label label) { _featureNames = label._featureNames; Text = label.Text; }
/// <summary> /// Initializes a new instance of the Label class by using a given LabelFeatureNameSet. /// </summary> /// <param name="featureNams">The given feature name set.</param> public Label(LabelFeatureNameSet featureNams) { _featureNames = featureNams; }
/// <summary> /// Creates a new feature name set. /// </summary> /// <param name="setName">The user specified name of this set.</param> /// <param name="featureNames">The feature names in order.</param> /// <returns>The created feature name set.</returns> public static LabelFeatureNameSet Create(string setName, IList<string> featureNames) { if (NamedSet.ContainsKey(setName)) { throw new InvalidOperationException(Helper.NeutralFormat("This is already a feature set named \"{0}\" exist", setName)); } // Create a LabelFeatureNameSet which contains the mandotary feature name already. LabelFeatureNameSet set = new LabelFeatureNameSet(); // Add the feature names one by one. int index = set._featureNameToIndex.Count; foreach (string featureName in featureNames) { if (!set._featureNameToIndex.ContainsKey(featureName)) { set._featureNameToIndex.Add(featureName, index++); } } // Whether the feature is too many? if (set._featureNameToIndex.Count > SeparatorChars.Length) { throw new InvalidDataException(Helper.NeutralFormat("The number of feature is too many for storage : {0}", set._featureNameToIndex.Count)); } NamedSet.Add(setName, set); return set; }
/// <summary> /// Parses a string and return a HtkLabelHelper instance by given LabelFeatureNameSet. /// </summary> /// <param name="value">The given string to be parsed.</param> /// <param name="featureNames">The given feature names.</param> /// <returns>The parsed HtkLabelHelper.</returns> public static LabelLine Parse(string value, LabelFeatureNameSet featureNames) { LabelLine labelLine = new LabelLine { // Initialize state as a negative value. State = -1, }; string labelText; string[] parts = value.Split(SplitterChars, StringSplitOptions.RemoveEmptyEntries); switch (parts.Length) { case 1: labelText = parts[0]; break; case 2: // Invalid currently. throw new InvalidDataException(Helper.NeutralFormat("Unsupported data \"{0}\"", value)); default: labelLine.Segment = new Segment(long.Parse(parts[0]), long.Parse(parts[1])); labelText = parts[2]; // Keep the remaining values. if (parts.Length > 3) { labelLine.Remaining = new string[parts.Length - 3]; Array.Copy(parts, 3, labelLine.Remaining, 0, labelLine.Remaining.Length); } break; } labelLine.Label = new Label(featureNames); // Check there is state information or not. if (labelText[labelText.Length - 1] == ']') { // The state info is appended to the label text as: a-b+c+x...x[state] // Find the previous "[" int index = labelText.LastIndexOf('['); if (index < 0) { throw new InvalidDataException(Helper.NeutralFormat("Unsupport format \"{0}\"", labelText)); } labelLine.Label.Text = labelText.Substring(0, index); labelLine.State = int.Parse(labelText.Substring(index + 1, labelText.Length - index - 2)); } else { labelLine.Label.Text = labelText; } return labelLine; }