Example #1
0
		/// <summary>
		/// Create an event reader based on the specified data reader of the specified type using the specified head rules.
		/// </summary>
		/// <param name="dataReader">
		/// A 1-parse-per-line Penn Treebank Style parse. 
		/// </param>
		/// <param name="rules">
		/// The head rules.
		/// </param>
		/// <param name="eventType">
		/// The type of events desired (tag, chunk, build, or check).
		/// </param>
        public ParserEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IHeadRules rules, EventType eventType)
		{
			if (eventType == EventType.Build)
			{
				mBuildContextGenerator = new BuildContextGenerator();
			}
			else if (eventType == EventType.Check)
			{
				mCheckContextGenerator = new CheckContextGenerator();
			}
			else if (eventType == EventType.Chunk)
			{
				mChunkContextGenerator = new ChunkContextGenerator();
			}
			else if (eventType == EventType.Tag)
			{
				mPosContextGenerator = new PosTagger.DefaultPosContextGenerator();
			}
			mHeadRules = rules;
			mEventType = eventType;
			mDataReader = dataReader;
			mEventIndex = 0;
			if (dataReader.HasNext())
			{
				AddNewEvents();
			}
			else
			{
				mEvents = new SharpEntropy.TrainingEvent[0];
			}
		}
Example #2
0
 /// <summary>
 /// Create an event reader based on the specified data reader of the specified type using the specified head rules.
 /// </summary>
 /// <param name="dataReader">
 /// A 1-parse-per-line Penn Treebank Style parse.
 /// </param>
 /// <param name="rules">
 /// The head rules.
 /// </param>
 /// <param name="eventType">
 /// The type of events desired (tag, chunk, build, or check).
 /// </param>
 public ParserEventReader(SharpEntropy.ITrainingDataReader <string> dataReader, IHeadRules rules, EventType eventType)
 {
     if (eventType == EventType.Build)
     {
         mBuildContextGenerator = new BuildContextGenerator();
     }
     else if (eventType == EventType.Check)
     {
         mCheckContextGenerator = new CheckContextGenerator();
     }
     else if (eventType == EventType.Chunk)
     {
         mChunkContextGenerator = new ChunkContextGenerator();
     }
     else if (eventType == EventType.Tag)
     {
         mPosContextGenerator = new PosTagger.DefaultPosContextGenerator();
     }
     mHeadRules  = rules;
     mEventType  = eventType;
     mDataReader = dataReader;
     mEventIndex = 0;
     if (dataReader.HasNext())
     {
         AddNewEvents();
     }
     else
     {
         mEvents = new SharpEntropy.TrainingEvent[0];
     }
 }
Example #3
0
        protected AbstractParserEventStream(
            IObjectStream <Parse> samples,
            IHeadRules headRules,
            ParserEventTypeEnum type,
            Dictionary.Dictionary dictionary) : base(samples)
        {
            Rules         = headRules;
            Punctuation   = headRules.PunctuationTags;
            Dictionary    = dictionary;
            FixPossesives = false;

            Type = type;

#pragma warning disable 618
            switch (type)
            {
            case ParserEventTypeEnum.Chunk:
                chunkerContextGenerator = new ChunkContextGenerator();
                break;

            case ParserEventTypeEnum.Tag:
                posContextGenerator = new DefaultPOSContextGenerator(null);
                break;
            }
#pragma warning restore 618
        }
Example #4
0
 protected AbstractParserEventStream(
     IObjectStream <Parse> samples,
     IHeadRules headRules,
     ParserEventTypeEnum type)
     : this(samples, headRules, type, null)
 {
 }
Example #5
0
        ///<summary>
        ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        ///<param name="beamSize">
        ///The number of different parses kept during parsing.
        ///</param>
        ///<param name="advancePercentage">
        ///The minimal amount of probability mass which advanced outcomes must represent.
        ///Only outcomes which contribute to the top "advancePercentage" will be explored.
        ///</param>
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage)
        {
            posTagger       = tagger;
            basalChunker    = chunker;
            this.buildModel = buildModel;
            this.checkModel = checkModel;
            m = beamSize;
            k = beamSize;
            q = advancePercentage;

            buildContextGenerator = new BuildContextGenerator();
            checkContextGenerator = new CheckContextGenerator();
            this.headRules        = headRules;

            startTypeMap    = new Dictionary <string, string>();
            continueTypeMap = new Dictionary <string, string>();
            for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++)
            {
                string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
                if (outcome.StartsWith(StartPrefix))
                {
                    //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
                    startTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
                }
                else if (outcome.StartsWith(ContinuePrefix))
                {
                    //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
                    continueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
                }
            }
            topStartIndex   = buildModel.GetOutcomeIndex(MTopStart);
            completeIndex   = checkModel.GetOutcomeIndex(CompleteOutcome);
            incompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
        }
Example #6
0
 /// <summary>
 /// Adds the specified daughter.
 /// </summary>
 /// <param name="daughter">The daughter.</param>
 /// <param name="rules">The rules.</param>
 /// <exception cref="System.InvalidOperationException">The head is null.</exception>
 public void Add(Parse daughter, IHeadRules rules)
 {
     if (daughter.PreviousPunctuationSet != null)
     {
         parts.AddRange(daughter.PreviousPunctuationSet);
     }
     parts.Add(daughter);
     Span = new Span(Span.Start, daughter.Span.End);
     Head = rules.GetHead(Children, Type);
     if (Head == null)
     {
         throw new InvalidOperationException("The head is null.");
     }
     HeadIndex = Head.HeadIndex;
 }
Example #7
0
 /// <summary>
 /// Computes the head parses for this parse and its sub-parses and stores this information
 /// in the parse data structure.
 /// </summary>
 /// <param name="rules">
 /// The head rules which determine how the head of the parse is computed.
 /// </param>
 public virtual void UpdateHeads(IHeadRules rules)
 {
     if (_parts != null && _parts.Count != 0)
     {
         for (int currentPart = 0, partCount = _parts.Count; currentPart < partCount; currentPart++)
         {
             Parse currentParse = _parts[currentPart];
             currentParse.UpdateHeads(rules);
         }
         Head = rules.GetHead(_parts.ToArray(), Type) ?? this;
     }
     else
     {
         Head = this;
     }
 }
Example #8
0
        /// <summary>
        /// Sister adjoins this node's last child and the specified sister node and returns their
        /// new parent node. The new parent node replace this nodes last child.
        /// </summary>
        /// <param name="sister">The node to be adjoined.</param>
        /// <param name="rules">The head rules for the parser.</param>
        /// <returns>The new parent node of this node and the specified sister node.</returns>
        public Parse AdJoin(Parse sister, IHeadRules rules)
        {
            var lastChild = parts[parts.Count - 1];
            var adjNode   = new Parse(Text, new Span(lastChild.Span.Start, sister.Span.End), lastChild.Type, 1, rules.GetHead(new[] { lastChild, sister }, lastChild.Type));

            adjNode.parts.Add(lastChild);
            if (sister.PreviousPunctuationSet != null)
            {
                adjNode.parts.AddRange(sister.PreviousPunctuationSet);
            }
            adjNode.parts.Add(sister);
            parts[parts.Count - 1] = adjNode;
            Span      = new Span(Span.Start, sister.Span.End);
            Head      = rules.GetHead(Children, Type);
            HeadIndex = Head.HeadIndex;
            return(adjNode);
        }
 /// <summary>
 /// Computes the head parses for this parse and its sub-parses and stores this information
 /// in the parse data structure.
 /// </summary>
 /// <param name="rules">
 /// The head rules which determine how the head of the parse is computed.
 /// </param>
 public virtual void UpdateHeads(IHeadRules rules)
 {
     if (mParts != null && mParts.Count != 0)
     {
         for (int currentPart = 0, partCount = mParts.Count; currentPart < partCount; currentPart++)
         {
             Parse currentParse = mParts[currentPart];
             currentParse.UpdateHeads(rules);
         }
         mHead = rules.GetHead(mParts.ToArray(), mType);
         if (mHead == null)
         {
             mHead = this;
         }
     }
     else
     {
         mHead = this;
     }
 }
Example #10
0
        ///<summary>
        ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        ///<param name="beamSize">
        ///The number of different parses kept during parsing.
        ///</param>
        ///<param name="advancePercentage">
        ///The minimal amount of probability mass which advanced outcomes must represent.
        ///Only outcomes which contribute to the top "advancePercentage" will be explored.
        ///</param>
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage)
        {
            mPosTagger    = tagger;
            mBasalChunker = chunker;
            mBuildModel   = buildModel;
            mCheckModel   = checkModel;
            M             = beamSize;
            K             = beamSize;
            Q             = advancePercentage;

            mBuildProbabilities    = new double[mBuildModel.OutcomeCount];
            mCheckProbabilities    = new double[mCheckModel.OutcomeCount];
            mBuildContextGenerator = new BuildContextGenerator();
            mCheckContextGenerator = new CheckContextGenerator();
            mHeadRules             = headRules;
            mOldDerivationsHeap    = new Util.TreeSet <Parse>();
            mNewDerivationsHeap    = new Util.TreeSet <Parse>();
            mParses = new Util.TreeSet <Parse>();

            mStartTypeMap    = new Dictionary <string, string>();
            mContinueTypeMap = new Dictionary <string, string>();
            for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++)
            {
                string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
                if (outcome.StartsWith(StartPrefix))
                {
                    //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
                    mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
                }
                else if (outcome.StartsWith(ContinuePrefix))
                {
                    //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
                    mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
                }
            }
            mTopStartIndex   = buildModel.GetOutcomeIndex(MTopStart);
            mCompleteIndex   = checkModel.GetOutcomeIndex(CompleteOutcome);
            mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
        }
Example #11
0
 /// <summary>
 /// Computes the head parses for this parse and its sub-parses and stores this information
 /// in the parse data structure.
 /// </summary>
 /// <param name="rules">The head rules which determine how the head of the parse is computed.</param>
 public void UpdateHeads(IHeadRules rules)
 {
     if (parts != null && parts.Count != 0)
     {
         for (int pi = 0, pn = parts.Count; pi < pn; pi++)
         {
             parts[pi].UpdateHeads(rules);
         }
         Head = rules.GetHead(parts.ToArray(), Type);
         if (Head == null)
         {
             Head = this;
         }
         else
         {
             HeadIndex = Head.HeadIndex;
         }
     }
     else
     {
         Head = this;
     }
 }
Example #12
0
        // Constructors -------------------------

        ///<summary>
        ///Creates a new parser using the specified models and head rules.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules) : this(buildModel, checkModel, tagger, chunker, headRules, DefaultBeamSize, DefaultAdvancePercentage)
        {
        }
Example #13
0
 /// <summary>
 /// Computes the head parses for this parse and its sub-parses and stores this information
 /// in the parse data structure. 
 /// </summary>
 /// <param name="rules">
 /// The head rules which determine how the head of the parse is computed.
 /// </param>
 public virtual void UpdateHeads(IHeadRules rules)
 {
     if (mParts != null && mParts.Count != 0)
     {
         for (int currentPart = 0, partCount = mParts.Count; currentPart < partCount; currentPart++)
         {
             Parse currentParse = mParts[currentPart];
             currentParse.UpdateHeads(rules);
         }
         mHead = rules.GetHead(mParts.ToArray(), mType);
         if (mHead == null)
         {
             mHead = this;
         }
     }
     else
     {
         mHead = this;
     }
 }
        ///<summary>
        ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        ///<param name="beamSize">
        ///The number of different parses kept during parsing.
        ///</param>
        ///<param name="advancePercentage">
        ///The minimal amount of probability mass which advanced outcomes must represent.
        ///Only outcomes which contribute to the top "advancePercentage" will be explored.
        ///</param>    
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage)
        {
            mPosTagger = tagger;
            mBasalChunker = chunker;
            mBuildModel = buildModel;
            mCheckModel = checkModel;
            M = beamSize;
            K = beamSize;
            Q = advancePercentage;

            mBuildProbabilities = new double[mBuildModel.OutcomeCount];
            mCheckProbabilities = new double[mCheckModel.OutcomeCount];
            mBuildContextGenerator = new BuildContextGenerator();
            mCheckContextGenerator = new CheckContextGenerator();
            mHeadRules = headRules;
            mOldDerivationsHeap = new Util.TreeSet<Parse>();
            mNewDerivationsHeap = new Util.TreeSet<Parse>();
            mParses = new Util.TreeSet<Parse>();

            mStartTypeMap = new Dictionary<string, string>();
            mContinueTypeMap = new Dictionary<string, string>();
            for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++)
            {
                string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
                if (outcome.StartsWith(StartPrefix))
                {
                    //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
                    mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
                }
                else if (outcome.StartsWith(ContinuePrefix))
                {
                    //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
                    mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
                }
            }
            mTopStartIndex = buildModel.GetOutcomeIndex(mTopStart);
            mCompleteIndex = checkModel.GetOutcomeIndex(CompleteOutcome);
            mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
        }
Example #15
0
		///<summary>
		///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
		///</summary>
		///<param name="buildModel">
		///The model to assign constituent labels.
		///</param>
		///<param name="checkModel">
		///The model to determine a constituent is complete.
		///</param>
		///<param name="tagger">
		///The model to assign pos-tags.
		///</param>
		///<param name="chunker">
		///The model to assign flat constituent labels.
		///</param>
		///<param name="headRules">
		///The head rules for head word perculation.
		///</param>
		///<param name="beamSize">
		///The number of different parses kept during parsing.
		///</param>
		///<param name="advancePercentage">
		///The minimal amount of probability mass which advanced outcomes must represent.
		///Only outcomes which contribute to the top "advancePercentage" will be explored.
		///</param>    
		public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) 
		{
			posTagger = tagger;
			basalChunker = chunker;
			this.buildModel = buildModel;
			this.checkModel = checkModel;
			m = beamSize;
			k = beamSize;
			q = advancePercentage;

			buildContextGenerator = new BuildContextGenerator();
			checkContextGenerator = new CheckContextGenerator();
			this.headRules = headRules;
			
			startTypeMap = new Dictionary<string, string>();
            continueTypeMap = new Dictionary<string, string>();
			for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) 
			{
				string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
				if (outcome.StartsWith(StartPrefix)) 
				{
					//System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
					startTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
				}
				else if (outcome.StartsWith(ContinuePrefix)) 
				{
					//System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
					continueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
				}
			}
			topStartIndex = buildModel.GetOutcomeIndex(MTopStart);
			completeIndex = checkModel.GetOutcomeIndex(CompleteOutcome);
			incompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
		}
Example #16
0
        // Constructors -------------------------

	    ///<summary>
		///Creates a new parser using the specified models and head rules.
		///</summary>
		///<param name="buildModel">
		///The model to assign constituent labels.
		///</param>
		///<param name="checkModel">
		///The model to determine a constituent is complete.
		///</param>
		///<param name="tagger">
		///The model to assign pos-tags.
		///</param>
		///<param name="chunker">
		///The model to assign flat constituent labels.
		///</param>
		///<param name="headRules">
		///The head rules for head word perculation.
		///</param>
		public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules) : this(buildModel, checkModel, tagger, chunker, headRules, DefaultBeamSize, DefaultAdvancePercentage)
		{}
Example #17
0
		/// <summary>
		/// Computes the head parses for this parse and its sub-parses and stores this information
		/// in the parse data structure. 
		/// </summary>
		/// <param name="rules">
		/// The head rules which determine how the head of the parse is computed.
		/// </param>
		public virtual void UpdateHeads(IHeadRules rules)
		{
			if (_parts != null && _parts.Count != 0)
			{
				for (int currentPart = 0, partCount = _parts.Count; currentPart < partCount; currentPart++)
				{
					Parse currentParse = _parts[currentPart];
					currentParse.UpdateHeads(rules);
				}
				Head = rules.GetHead(_parts.ToArray(), Type) ?? this;
			}
			else
			{
				Head = this;
			}
		}