/// <summary> /// Loads the pre-selection data from text file. /// </summary> /// <param name="forestFile">The file name of decision forest.</param> /// <param name="candidateGroupFile">The file name of candidate group data.</param> /// <param name="sentenceSet">The given sentence set where to find candidates.</param> public void LoadFromText(string forestFile, string candidateGroupFile, TrainingSentenceSet sentenceSet) { _sentenceSet = sentenceSet; _decisionForest = new DecisionForest("pre-selection"); _decisionForest.Load(forestFile); using (StreamReader fileReader = new StreamReader(candidateGroupFile)) { while (!fileReader.EndOfStream) { CandidateGroup candidateGroup = new CandidateGroup(); candidateGroup.Load(fileReader, sentenceSet); _nameIndexedCandidateGroup.Add(candidateGroup.Name, candidateGroup); } } // Each leaf node must be in the candidate groups. int countOfLeafNodes = 0; foreach (DecisionTree tree in _decisionForest.TreeList) { countOfLeafNodes += tree.LeafNodeMap.Count; foreach (DecisionTreeNode node in tree.LeafNodeMap.Values) { if (!_nameIndexedCandidateGroup.ContainsKey(node.Name)) { throw new InvalidDataException( Helper.NeutralFormat("Mismatched between file \"{0}\" and \"{1}\"", forestFile, candidateGroupFile)); } } } // Ensure candidate id is continuous and starts with zero. List<int> expected = new List<int>(); for (int i = 0; i < _nameIndexedCandidateGroup.Count; ++i) { expected.Add(i); } if (!Helper.Compare(expected, _nameIndexedCandidateGroup.Select(pair => pair.Value.Id).ToArray(), true)) { throw new InvalidDataException("The candidate group id should be continuous and starts with zero"); } // The count of candidate group must be equal to the count of leaf nodes. if (countOfLeafNodes != _nameIndexedCandidateGroup.Count) { throw new InvalidDataException( Helper.NeutralFormat("Mismatched between file \"{0}\" and \"{1}\"", forestFile, candidateGroupFile)); } }
/// <summary> /// Initializes a new instance of the PreSelectionData class according to given forest and sentenceSet. /// </summary> /// <param name="forest">The given forest.</param> /// <param name="sentenceSet">The given sentence set where to find candiates.</param> /// <param name="fullFeatureNameSet">The full feature set to parse tree.</param> public PreSelectionData(DecisionForest forest, TrainingSentenceSet sentenceSet, LabelFeatureNameSet fullFeatureNameSet) { if (forest == null) { throw new ArgumentNullException("forest"); } if (sentenceSet == null) { throw new ArgumentNullException("sentenceSet"); } if (fullFeatureNameSet == null) { throw new ArgumentNullException("fullFeatureNameSet"); } _decisionForest = forest; _sentenceSet = sentenceSet; _nameIndexedCandidateGroup = new Dictionary<string, CandidateGroup>(); // Create empty candidate group. foreach (DecisionTree tree in forest.TreeList) { foreach (DecisionTreeNode node in tree.LeafNodeMap.Values) { CandidateGroup candidateGroup = new CandidateGroup { Name = node.Name, Id = _nameIndexedCandidateGroup.Count }; _nameIndexedCandidateGroup.Add(candidateGroup.Name, candidateGroup); } } // Travel the training sentence set to find the corresponding candidates. foreach (Sentence sentence in sentenceSet.Sentences.Values) { foreach (UnitCandidate candidate in sentence.Candidates) { if (!candidate.SilenceCandidate) { candidate.Label.FeatureNameSet = fullFeatureNameSet; DecisionTree[] linkedDecisionTrees = forest.TreeList.Where(t => t.Name == candidate.Name).ToArray(); Debug.Assert(linkedDecisionTrees.Length == 1, Helper.NeutralFormat("Invalidated: More than 1 {0} Preselection tree are linked to unit {1}", linkedDecisionTrees.Length, candidate.Name)); DecisionTreeNode leafNode = DecisionForestExtension.FilterTree(linkedDecisionTrees[0].NodeList[0], forest.Questions, candidate.Label); Debug.Assert(leafNode != null, Helper.NeutralFormat("cannot find leaf node for candidate {0} in sentence {1}", candidate.Name, sentence.Id)); _nameIndexedCandidateGroup[leafNode.Name].Candidates.Add(candidate); } } } // Verify there is no empty candidate group. foreach (CandidateGroup candidateGroup in _nameIndexedCandidateGroup.Values) { if (candidateGroup.Candidates.Count <= 0) { throw new InvalidDataException( Helper.NeutralFormat("There is no candidate in candidate group \"{0}\"", candidateGroup.Name)); } } }
/// <summary> /// Saves the pre-selection data as text. /// </summary> /// <param name="forestFile">The file name of decision forest.</param> /// <param name="candidateGroupFile">The file name of candidate group data.</param> public void SaveAsText(string forestFile, string candidateGroupFile) { _decisionForest.Save(forestFile); using (StreamWriter fileWriter = new StreamWriter(candidateGroupFile, false, Encoding.ASCII)) { CandidateGroup[] groups = new CandidateGroup[_nameIndexedCandidateGroup.Count]; foreach (KeyValuePair<string, CandidateGroup> kvp in _nameIndexedCandidateGroup) { groups[kvp.Value.Id] = kvp.Value; } foreach (CandidateGroup group in groups) { group.Save(fileWriter); } } }
/// <summary> /// Initializes a new instance of the PreselectionNodeData class. /// </summary> /// <param name="candidateGroup">The candidate group to be wrapped.</param> public PreselectionNodeData(CandidateGroup candidateGroup) { _candidateGroup = candidateGroup; }