Пример #1
0
        /// <summary>
        /// Loads the pre-selection data from text file.
        /// </summary>
        /// <param name="forestFile">The file name of decision forest.</param>
        /// <param name="candidateGroupFile">The file name of candidate group data.</param>
        /// <param name="sentenceSet">The given sentence set where to find candidates.</param>
        public void LoadFromText(string forestFile, string candidateGroupFile, TrainingSentenceSet sentenceSet)
        {
            _sentenceSet = sentenceSet;
            _decisionForest = new DecisionForest("pre-selection");
            _decisionForest.Load(forestFile);
            using (StreamReader fileReader = new StreamReader(candidateGroupFile))
            {
                while (!fileReader.EndOfStream)
                {
                    CandidateGroup candidateGroup = new CandidateGroup();
                    candidateGroup.Load(fileReader, sentenceSet);
                    _nameIndexedCandidateGroup.Add(candidateGroup.Name, candidateGroup);
                }
            }

            // Each leaf node must be in the candidate groups.
            int countOfLeafNodes = 0;
            foreach (DecisionTree tree in _decisionForest.TreeList)
            {
                countOfLeafNodes += tree.LeafNodeMap.Count;
                foreach (DecisionTreeNode node in tree.LeafNodeMap.Values)
                {
                    if (!_nameIndexedCandidateGroup.ContainsKey(node.Name))
                    {
                        throw new InvalidDataException(
                            Helper.NeutralFormat("Mismatched between file \"{0}\" and \"{1}\"", forestFile, candidateGroupFile));
                    }
                }
            }

            // Ensure candidate id is continuous and starts with zero.
            List<int> expected = new List<int>();
            for (int i = 0; i < _nameIndexedCandidateGroup.Count; ++i)
            {
                expected.Add(i);
            }

            if (!Helper.Compare(expected, _nameIndexedCandidateGroup.Select(pair => pair.Value.Id).ToArray(), true))
            {
                throw new InvalidDataException("The candidate group id should be continuous and starts with zero");
            }

                // The count of candidate group must be equal to the count of leaf nodes.
            if (countOfLeafNodes != _nameIndexedCandidateGroup.Count)
            {
                throw new InvalidDataException(
                    Helper.NeutralFormat("Mismatched between file \"{0}\" and \"{1}\"", forestFile, candidateGroupFile));
            }
        }
Пример #2
0
        /// <summary>
        /// Initializes a new instance of the PreSelectionData class according to given forest and sentenceSet.
        /// </summary>
        /// <param name="forest">The given forest.</param>
        /// <param name="sentenceSet">The given sentence set where to find candiates.</param>
        /// <param name="fullFeatureNameSet">The full feature set to parse tree.</param>
        public PreSelectionData(DecisionForest forest, TrainingSentenceSet sentenceSet, LabelFeatureNameSet fullFeatureNameSet)
        {
            if (forest == null)
            {
                throw new ArgumentNullException("forest");
            }

            if (sentenceSet == null)
            {
                throw new ArgumentNullException("sentenceSet");
            }

            if (fullFeatureNameSet == null)
            {
                throw new ArgumentNullException("fullFeatureNameSet");
            }

            _decisionForest = forest;
            _sentenceSet = sentenceSet;
            _nameIndexedCandidateGroup = new Dictionary<string, CandidateGroup>();

            // Create empty candidate group.
            foreach (DecisionTree tree in forest.TreeList)
            {
                foreach (DecisionTreeNode node in tree.LeafNodeMap.Values)
                {
                    CandidateGroup candidateGroup = new CandidateGroup
                    {
                        Name = node.Name,
                        Id = _nameIndexedCandidateGroup.Count
                    };

                    _nameIndexedCandidateGroup.Add(candidateGroup.Name, candidateGroup);
                }
            }

            // Travel the training sentence set to find the corresponding candidates.
            foreach (Sentence sentence in sentenceSet.Sentences.Values)
            {
                foreach (UnitCandidate candidate in sentence.Candidates)
                {
                    if (!candidate.SilenceCandidate)
                    {
                        candidate.Label.FeatureNameSet = fullFeatureNameSet;
                        DecisionTree[] linkedDecisionTrees = forest.TreeList.Where(t => t.Name == candidate.Name).ToArray();
                        Debug.Assert(linkedDecisionTrees.Length == 1,
                            Helper.NeutralFormat("Invalidated: More than 1 {0} Preselection tree are linked to unit {1}", linkedDecisionTrees.Length, candidate.Name));

                        DecisionTreeNode leafNode = DecisionForestExtension.FilterTree(linkedDecisionTrees[0].NodeList[0], forest.Questions, candidate.Label);
                        Debug.Assert(leafNode != null, Helper.NeutralFormat("cannot find leaf node for candidate {0} in sentence {1}", candidate.Name, sentence.Id));

                        _nameIndexedCandidateGroup[leafNode.Name].Candidates.Add(candidate);
                    }
                }
            }

            // Verify there is no empty candidate group.
            foreach (CandidateGroup candidateGroup in _nameIndexedCandidateGroup.Values)
            {
                if (candidateGroup.Candidates.Count <= 0)
                {
                    throw new InvalidDataException(
                        Helper.NeutralFormat("There is no candidate in candidate group \"{0}\"", candidateGroup.Name));
                }
            }
        }
Пример #3
0
        /// <summary>
        /// Saves the pre-selection data as text.
        /// </summary>
        /// <param name="forestFile">The file name of decision forest.</param>
        /// <param name="candidateGroupFile">The file name of candidate group data.</param>
        public void SaveAsText(string forestFile, string candidateGroupFile)
        {
            _decisionForest.Save(forestFile);
            using (StreamWriter fileWriter = new StreamWriter(candidateGroupFile, false, Encoding.ASCII))
            {
                CandidateGroup[] groups = new CandidateGroup[_nameIndexedCandidateGroup.Count];
                foreach (KeyValuePair<string, CandidateGroup> kvp in _nameIndexedCandidateGroup)
                {
                    groups[kvp.Value.Id] = kvp.Value;
                }

                foreach (CandidateGroup group in groups)
                {
                    group.Save(fileWriter);
                }
            }
        }
 /// <summary>
 /// Initializes a new instance of the PreselectionNodeData class.
 /// </summary>
 /// <param name="candidateGroup">The candidate group to be wrapped.</param>
 public PreselectionNodeData(CandidateGroup candidateGroup)
 {
     _candidateGroup = candidateGroup;
 }