/// <summary> /// Create an event reader based on the specified data reader of the specified type using the specified head rules. /// </summary> /// <param name="dataReader"> /// A 1-parse-per-line Penn Treebank Style parse. /// </param> /// <param name="rules"> /// The head rules. /// </param> /// <param name="eventType"> /// The type of events desired (tag, chunk, build, or check). /// </param> public ParserEventReader(SharpEntropy.ITrainingDataReader <string> dataReader, IHeadRules rules, EventType eventType) { if (eventType == EventType.Build) { mBuildContextGenerator = new BuildContextGenerator(); } else if (eventType == EventType.Check) { mCheckContextGenerator = new CheckContextGenerator(); } else if (eventType == EventType.Chunk) { mChunkContextGenerator = new ChunkContextGenerator(); } else if (eventType == EventType.Tag) { mPosContextGenerator = new POS_ContextGenerator(); } mHeadRules = rules; mEventType = eventType; mDataReader = dataReader; mEventIndex = 0; if (dataReader.HasNext()) { AddNewEvents(); } else { mEvents = new SharpEntropy.TrainingEvent[0]; } }
///<summary> ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> ///<param name="beamSize"> ///The number of different parses kept during parsing. ///</param> ///<param name="advancePercentage"> ///The minimal amount of probability mass which advanced outcomes must represent. ///Only outcomes which contribute to the top "advancePercentage" will be explored. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) { mPosTagger = tagger; mBasalChunker = chunker; mBuildModel = buildModel; mCheckModel = checkModel; M = beamSize; K = beamSize; Q = advancePercentage; mBuildProbabilities = new double[mBuildModel.OutcomeCount]; mCheckProbabilities = new double[mCheckModel.OutcomeCount]; mBuildContextGenerator = new BuildContextGenerator(); mCheckContextGenerator = new CheckContextGenerator(); mHeadRules = headRules; mOldDerivationsHeap = new Util.TreeSet <Parse>(); mNewDerivationsHeap = new Util.TreeSet <Parse>(); mParses = new Util.TreeSet <Parse>(); mStartTypeMap = new Dictionary <string, string>(); mContinueTypeMap = new Dictionary <string, string>(); for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) { string outcome = buildModel.GetOutcomeName(buildOutcomeIndex); if (outcome.StartsWith(StartPrefix)) { //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length)); mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length)); } else if (outcome.StartsWith(ContinuePrefix)) { //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length)); mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length)); } } mTopStartIndex = buildModel.GetOutcomeIndex(mTopStart); mCompleteIndex = checkModel.GetOutcomeIndex(CompleteOutcome); mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome); }