/// <summary> /// Loads the pre-selection data from text file. /// </summary> /// <param name="forestFile">The file name of decision forest.</param> /// <param name="candidateGroupFile">The file name of candidate group data.</param> /// <param name="sentenceSet">The given sentence set where to find candidates.</param> public void LoadFromText(string forestFile, string candidateGroupFile, TrainingSentenceSet sentenceSet) { _sentenceSet = sentenceSet; _decisionForest = new DecisionForest("pre-selection"); _decisionForest.Load(forestFile); using (StreamReader fileReader = new StreamReader(candidateGroupFile)) { while (!fileReader.EndOfStream) { CandidateGroup candidateGroup = new CandidateGroup(); candidateGroup.Load(fileReader, sentenceSet); _nameIndexedCandidateGroup.Add(candidateGroup.Name, candidateGroup); } } // Each leaf node must be in the candidate groups. int countOfLeafNodes = 0; foreach (DecisionTree tree in _decisionForest.TreeList) { countOfLeafNodes += tree.LeafNodeMap.Count; foreach (DecisionTreeNode node in tree.LeafNodeMap.Values) { if (!_nameIndexedCandidateGroup.ContainsKey(node.Name)) { throw new InvalidDataException( Helper.NeutralFormat("Mismatched between file \"{0}\" and \"{1}\"", forestFile, candidateGroupFile)); } } } // Ensure candidate id is continuous and starts with zero. List<int> expected = new List<int>(); for (int i = 0; i < _nameIndexedCandidateGroup.Count; ++i) { expected.Add(i); } if (!Helper.Compare(expected, _nameIndexedCandidateGroup.Select(pair => pair.Value.Id).ToArray(), true)) { throw new InvalidDataException("The candidate group id should be continuous and starts with zero"); } // The count of candidate group must be equal to the count of leaf nodes. if (countOfLeafNodes != _nameIndexedCandidateGroup.Count) { throw new InvalidDataException( Helper.NeutralFormat("Mismatched between file \"{0}\" and \"{1}\"", forestFile, candidateGroupFile)); } }