/// <summary> /// Create an event reader based on the specified data reader of the specified type using the specified head rules. /// </summary> /// <param name="dataReader"> /// A 1-parse-per-line Penn Treebank Style parse. /// </param> /// <param name="rules"> /// The head rules. /// </param> /// <param name="eventType"> /// The type of events desired (tag, chunk, build, or check). /// </param> public ParserEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IHeadRules rules, EventType eventType) { if (eventType == EventType.Build) { mBuildContextGenerator = new BuildContextGenerator(); } else if (eventType == EventType.Check) { mCheckContextGenerator = new CheckContextGenerator(); } else if (eventType == EventType.Chunk) { mChunkContextGenerator = new ChunkContextGenerator(); } else if (eventType == EventType.Tag) { mPosContextGenerator = new PosTagger.DefaultPosContextGenerator(); } mHeadRules = rules; mEventType = eventType; mDataReader = dataReader; mEventIndex = 0; if (dataReader.HasNext()) { AddNewEvents(); } else { mEvents = new SharpEntropy.TrainingEvent[0]; } }
/// <summary> /// Create an event reader based on the specified data reader of the specified type using the specified head rules. /// </summary> /// <param name="dataReader"> /// A 1-parse-per-line Penn Treebank Style parse. /// </param> /// <param name="rules"> /// The head rules. /// </param> /// <param name="eventType"> /// The type of events desired (tag, chunk, build, or check). /// </param> public ParserEventReader(SharpEntropy.ITrainingDataReader <string> dataReader, IHeadRules rules, EventType eventType) { if (eventType == EventType.Build) { mBuildContextGenerator = new BuildContextGenerator(); } else if (eventType == EventType.Check) { mCheckContextGenerator = new CheckContextGenerator(); } else if (eventType == EventType.Chunk) { mChunkContextGenerator = new ChunkContextGenerator(); } else if (eventType == EventType.Tag) { mPosContextGenerator = new PosTagger.DefaultPosContextGenerator(); } mHeadRules = rules; mEventType = eventType; mDataReader = dataReader; mEventIndex = 0; if (dataReader.HasNext()) { AddNewEvents(); } else { mEvents = new SharpEntropy.TrainingEvent[0]; } }
protected AbstractParserEventStream( IObjectStream <Parse> samples, IHeadRules headRules, ParserEventTypeEnum type, Dictionary.Dictionary dictionary) : base(samples) { Rules = headRules; Punctuation = headRules.PunctuationTags; Dictionary = dictionary; FixPossesives = false; Type = type; #pragma warning disable 618 switch (type) { case ParserEventTypeEnum.Chunk: chunkerContextGenerator = new ChunkContextGenerator(); break; case ParserEventTypeEnum.Tag: posContextGenerator = new DefaultPOSContextGenerator(null); break; } #pragma warning restore 618 }
protected AbstractParserEventStream( IObjectStream <Parse> samples, IHeadRules headRules, ParserEventTypeEnum type) : this(samples, headRules, type, null) { }
///<summary> ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> ///<param name="beamSize"> ///The number of different parses kept during parsing. ///</param> ///<param name="advancePercentage"> ///The minimal amount of probability mass which advanced outcomes must represent. ///Only outcomes which contribute to the top "advancePercentage" will be explored. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) { posTagger = tagger; basalChunker = chunker; this.buildModel = buildModel; this.checkModel = checkModel; m = beamSize; k = beamSize; q = advancePercentage; buildContextGenerator = new BuildContextGenerator(); checkContextGenerator = new CheckContextGenerator(); this.headRules = headRules; startTypeMap = new Dictionary <string, string>(); continueTypeMap = new Dictionary <string, string>(); for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) { string outcome = buildModel.GetOutcomeName(buildOutcomeIndex); if (outcome.StartsWith(StartPrefix)) { //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length)); startTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length)); } else if (outcome.StartsWith(ContinuePrefix)) { //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length)); continueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length)); } } topStartIndex = buildModel.GetOutcomeIndex(MTopStart); completeIndex = checkModel.GetOutcomeIndex(CompleteOutcome); incompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome); }
/// <summary> /// Adds the specified daughter. /// </summary> /// <param name="daughter">The daughter.</param> /// <param name="rules">The rules.</param> /// <exception cref="System.InvalidOperationException">The head is null.</exception> public void Add(Parse daughter, IHeadRules rules) { if (daughter.PreviousPunctuationSet != null) { parts.AddRange(daughter.PreviousPunctuationSet); } parts.Add(daughter); Span = new Span(Span.Start, daughter.Span.End); Head = rules.GetHead(Children, Type); if (Head == null) { throw new InvalidOperationException("The head is null."); } HeadIndex = Head.HeadIndex; }
/// <summary> /// Computes the head parses for this parse and its sub-parses and stores this information /// in the parse data structure. /// </summary> /// <param name="rules"> /// The head rules which determine how the head of the parse is computed. /// </param> public virtual void UpdateHeads(IHeadRules rules) { if (_parts != null && _parts.Count != 0) { for (int currentPart = 0, partCount = _parts.Count; currentPart < partCount; currentPart++) { Parse currentParse = _parts[currentPart]; currentParse.UpdateHeads(rules); } Head = rules.GetHead(_parts.ToArray(), Type) ?? this; } else { Head = this; } }
/// <summary> /// Sister adjoins this node's last child and the specified sister node and returns their /// new parent node. The new parent node replace this nodes last child. /// </summary> /// <param name="sister">The node to be adjoined.</param> /// <param name="rules">The head rules for the parser.</param> /// <returns>The new parent node of this node and the specified sister node.</returns> public Parse AdJoin(Parse sister, IHeadRules rules) { var lastChild = parts[parts.Count - 1]; var adjNode = new Parse(Text, new Span(lastChild.Span.Start, sister.Span.End), lastChild.Type, 1, rules.GetHead(new[] { lastChild, sister }, lastChild.Type)); adjNode.parts.Add(lastChild); if (sister.PreviousPunctuationSet != null) { adjNode.parts.AddRange(sister.PreviousPunctuationSet); } adjNode.parts.Add(sister); parts[parts.Count - 1] = adjNode; Span = new Span(Span.Start, sister.Span.End); Head = rules.GetHead(Children, Type); HeadIndex = Head.HeadIndex; return(adjNode); }
/// <summary> /// Computes the head parses for this parse and its sub-parses and stores this information /// in the parse data structure. /// </summary> /// <param name="rules"> /// The head rules which determine how the head of the parse is computed. /// </param> public virtual void UpdateHeads(IHeadRules rules) { if (mParts != null && mParts.Count != 0) { for (int currentPart = 0, partCount = mParts.Count; currentPart < partCount; currentPart++) { Parse currentParse = mParts[currentPart]; currentParse.UpdateHeads(rules); } mHead = rules.GetHead(mParts.ToArray(), mType); if (mHead == null) { mHead = this; } } else { mHead = this; } }
///<summary> ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> ///<param name="beamSize"> ///The number of different parses kept during parsing. ///</param> ///<param name="advancePercentage"> ///The minimal amount of probability mass which advanced outcomes must represent. ///Only outcomes which contribute to the top "advancePercentage" will be explored. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) { mPosTagger = tagger; mBasalChunker = chunker; mBuildModel = buildModel; mCheckModel = checkModel; M = beamSize; K = beamSize; Q = advancePercentage; mBuildProbabilities = new double[mBuildModel.OutcomeCount]; mCheckProbabilities = new double[mCheckModel.OutcomeCount]; mBuildContextGenerator = new BuildContextGenerator(); mCheckContextGenerator = new CheckContextGenerator(); mHeadRules = headRules; mOldDerivationsHeap = new Util.TreeSet <Parse>(); mNewDerivationsHeap = new Util.TreeSet <Parse>(); mParses = new Util.TreeSet <Parse>(); mStartTypeMap = new Dictionary <string, string>(); mContinueTypeMap = new Dictionary <string, string>(); for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) { string outcome = buildModel.GetOutcomeName(buildOutcomeIndex); if (outcome.StartsWith(StartPrefix)) { //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length)); mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length)); } else if (outcome.StartsWith(ContinuePrefix)) { //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length)); mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length)); } } mTopStartIndex = buildModel.GetOutcomeIndex(MTopStart); mCompleteIndex = checkModel.GetOutcomeIndex(CompleteOutcome); mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome); }
/// <summary> /// Computes the head parses for this parse and its sub-parses and stores this information /// in the parse data structure. /// </summary> /// <param name="rules">The head rules which determine how the head of the parse is computed.</param> public void UpdateHeads(IHeadRules rules) { if (parts != null && parts.Count != 0) { for (int pi = 0, pn = parts.Count; pi < pn; pi++) { parts[pi].UpdateHeads(rules); } Head = rules.GetHead(parts.ToArray(), Type); if (Head == null) { Head = this; } else { HeadIndex = Head.HeadIndex; } } else { Head = this; } }
// Constructors ------------------------- ///<summary> ///Creates a new parser using the specified models and head rules. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules) : this(buildModel, checkModel, tagger, chunker, headRules, DefaultBeamSize, DefaultAdvancePercentage) { }
///<summary> ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> ///<param name="beamSize"> ///The number of different parses kept during parsing. ///</param> ///<param name="advancePercentage"> ///The minimal amount of probability mass which advanced outcomes must represent. ///Only outcomes which contribute to the top "advancePercentage" will be explored. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) { mPosTagger = tagger; mBasalChunker = chunker; mBuildModel = buildModel; mCheckModel = checkModel; M = beamSize; K = beamSize; Q = advancePercentage; mBuildProbabilities = new double[mBuildModel.OutcomeCount]; mCheckProbabilities = new double[mCheckModel.OutcomeCount]; mBuildContextGenerator = new BuildContextGenerator(); mCheckContextGenerator = new CheckContextGenerator(); mHeadRules = headRules; mOldDerivationsHeap = new Util.TreeSet<Parse>(); mNewDerivationsHeap = new Util.TreeSet<Parse>(); mParses = new Util.TreeSet<Parse>(); mStartTypeMap = new Dictionary<string, string>(); mContinueTypeMap = new Dictionary<string, string>(); for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) { string outcome = buildModel.GetOutcomeName(buildOutcomeIndex); if (outcome.StartsWith(StartPrefix)) { //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length)); mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length)); } else if (outcome.StartsWith(ContinuePrefix)) { //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length)); mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length)); } } mTopStartIndex = buildModel.GetOutcomeIndex(mTopStart); mCompleteIndex = checkModel.GetOutcomeIndex(CompleteOutcome); mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome); }
///<summary> ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> ///<param name="beamSize"> ///The number of different parses kept during parsing. ///</param> ///<param name="advancePercentage"> ///The minimal amount of probability mass which advanced outcomes must represent. ///Only outcomes which contribute to the top "advancePercentage" will be explored. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) { posTagger = tagger; basalChunker = chunker; this.buildModel = buildModel; this.checkModel = checkModel; m = beamSize; k = beamSize; q = advancePercentage; buildContextGenerator = new BuildContextGenerator(); checkContextGenerator = new CheckContextGenerator(); this.headRules = headRules; startTypeMap = new Dictionary<string, string>(); continueTypeMap = new Dictionary<string, string>(); for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) { string outcome = buildModel.GetOutcomeName(buildOutcomeIndex); if (outcome.StartsWith(StartPrefix)) { //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length)); startTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length)); } else if (outcome.StartsWith(ContinuePrefix)) { //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length)); continueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length)); } } topStartIndex = buildModel.GetOutcomeIndex(MTopStart); completeIndex = checkModel.GetOutcomeIndex(CompleteOutcome); incompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome); }
// Constructors ------------------------- ///<summary> ///Creates a new parser using the specified models and head rules. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules) : this(buildModel, checkModel, tagger, chunker, headRules, DefaultBeamSize, DefaultAdvancePercentage) {}