Example #1
0
        ///<summary>
        ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        ///<param name="beamSize">
        ///The number of different parses kept during parsing.
        ///</param>
        ///<param name="advancePercentage">
        ///The minimal amount of probability mass which advanced outcomes must represent.
        ///Only outcomes which contribute to the top "advancePercentage" will be explored.
        ///</param>
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage)
        {
            posTagger       = tagger;
            basalChunker    = chunker;
            this.buildModel = buildModel;
            this.checkModel = checkModel;
            m = beamSize;
            k = beamSize;
            q = advancePercentage;

            buildContextGenerator = new BuildContextGenerator();
            checkContextGenerator = new CheckContextGenerator();
            this.headRules        = headRules;

            startTypeMap    = new Dictionary <string, string>();
            continueTypeMap = new Dictionary <string, string>();
            for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++)
            {
                string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
                if (outcome.StartsWith(StartPrefix))
                {
                    //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
                    startTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
                }
                else if (outcome.StartsWith(ContinuePrefix))
                {
                    //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
                    continueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
                }
            }
            topStartIndex   = buildModel.GetOutcomeIndex(MTopStart);
            completeIndex   = checkModel.GetOutcomeIndex(CompleteOutcome);
            incompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
        }
Example #2
0
 /// <summary>
 /// Create an event reader based on the specified data reader of the specified type using the specified head rules.
 /// </summary>
 /// <param name="dataReader">
 /// A 1-parse-per-line Penn Treebank Style parse.
 /// </param>
 /// <param name="rules">
 /// The head rules.
 /// </param>
 /// <param name="eventType">
 /// The type of events desired (tag, chunk, build, or check).
 /// </param>
 public ParserEventReader(SharpEntropy.ITrainingDataReader <string> dataReader, IHeadRules rules, EventType eventType)
 {
     if (eventType == EventType.Build)
     {
         mBuildContextGenerator = new BuildContextGenerator();
     }
     else if (eventType == EventType.Check)
     {
         mCheckContextGenerator = new CheckContextGenerator();
     }
     else if (eventType == EventType.Chunk)
     {
         mChunkContextGenerator = new ChunkContextGenerator();
     }
     else if (eventType == EventType.Tag)
     {
         mPosContextGenerator = new PosTagger.DefaultPosContextGenerator();
     }
     mHeadRules  = rules;
     mEventType  = eventType;
     mDataReader = dataReader;
     mEventIndex = 0;
     if (dataReader.HasNext())
     {
         AddNewEvents();
     }
     else
     {
         mEvents = new SharpEntropy.TrainingEvent[0];
     }
 }
Example #3
0
        ///<summary>
        ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        ///<param name="beamSize">
        ///The number of different parses kept during parsing.
        ///</param>
        ///<param name="advancePercentage">
        ///The minimal amount of probability mass which advanced outcomes must represent.
        ///Only outcomes which contribute to the top "advancePercentage" will be explored.
        ///</param>
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage)
        {
            mPosTagger    = tagger;
            mBasalChunker = chunker;
            mBuildModel   = buildModel;
            mCheckModel   = checkModel;
            M             = beamSize;
            K             = beamSize;
            Q             = advancePercentage;

            mBuildProbabilities    = new double[mBuildModel.OutcomeCount];
            mCheckProbabilities    = new double[mCheckModel.OutcomeCount];
            mBuildContextGenerator = new BuildContextGenerator();
            mCheckContextGenerator = new CheckContextGenerator();
            mHeadRules             = headRules;
            mOldDerivationsHeap    = new Util.TreeSet <Parse>();
            mNewDerivationsHeap    = new Util.TreeSet <Parse>();
            mParses = new Util.TreeSet <Parse>();

            mStartTypeMap    = new Dictionary <string, string>();
            mContinueTypeMap = new Dictionary <string, string>();
            for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++)
            {
                string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
                if (outcome.StartsWith(StartPrefix))
                {
                    //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
                    mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
                }
                else if (outcome.StartsWith(ContinuePrefix))
                {
                    //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
                    mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
                }
            }
            mTopStartIndex   = buildModel.GetOutcomeIndex(MTopStart);
            mCompleteIndex   = checkModel.GetOutcomeIndex(CompleteOutcome);
            mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
        }
Example #4
0
		///<summary>
		///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
		///</summary>
		///<param name="buildModel">
		///The model to assign constituent labels.
		///</param>
		///<param name="checkModel">
		///The model to determine a constituent is complete.
		///</param>
		///<param name="tagger">
		///The model to assign pos-tags.
		///</param>
		///<param name="chunker">
		///The model to assign flat constituent labels.
		///</param>
		///<param name="headRules">
		///The head rules for head word perculation.
		///</param>
		///<param name="beamSize">
		///The number of different parses kept during parsing.
		///</param>
		///<param name="advancePercentage">
		///The minimal amount of probability mass which advanced outcomes must represent.
		///Only outcomes which contribute to the top "advancePercentage" will be explored.
		///</param>    
		public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) 
		{
			posTagger = tagger;
			basalChunker = chunker;
			this.buildModel = buildModel;
			this.checkModel = checkModel;
			m = beamSize;
			k = beamSize;
			q = advancePercentage;

			buildContextGenerator = new BuildContextGenerator();
			checkContextGenerator = new CheckContextGenerator();
			this.headRules = headRules;
			
			startTypeMap = new Dictionary<string, string>();
            continueTypeMap = new Dictionary<string, string>();
			for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) 
			{
				string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
				if (outcome.StartsWith(StartPrefix)) 
				{
					//System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
					startTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
				}
				else if (outcome.StartsWith(ContinuePrefix)) 
				{
					//System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
					continueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
				}
			}
			topStartIndex = buildModel.GetOutcomeIndex(MTopStart);
			completeIndex = checkModel.GetOutcomeIndex(CompleteOutcome);
			incompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
		}
 /// <summary>
 /// Create an event reader based on the specified data reader of the specified type using the specified head rules.
 /// </summary>
 /// <param name="dataReader">
 /// A 1-parse-per-line Penn Treebank Style parse. 
 /// </param>
 /// <param name="rules">
 /// The head rules.
 /// </param>
 /// <param name="eventType">
 /// The type of events desired (tag, chunk, build, or check).
 /// </param>
 public ParserEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IHeadRules rules, EventType eventType)
 {
     if (eventType == EventType.Build)
     {
         mBuildContextGenerator = new BuildContextGenerator();
     }
     else if (eventType == EventType.Check)
     {
         mCheckContextGenerator = new CheckContextGenerator();
     }
     else if (eventType == EventType.Chunk)
     {
         mChunkContextGenerator = new ChunkContextGenerator();
     }
     else if (eventType == EventType.Tag)
     {
         mPosContextGenerator = new PosTagger.DefaultPosContextGenerator();
     }
     mHeadRules = rules;
     mEventType = eventType;
     mDataReader = dataReader;
     mEventIndex = 0;
     if (dataReader.HasNext())
     {
         AddNewEvents();
     }
     else
     {
         mEvents = new SharpEntropy.TrainingEvent[0];
     }
 }
        ///<summary>
        ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        ///<param name="beamSize">
        ///The number of different parses kept during parsing.
        ///</param>
        ///<param name="advancePercentage">
        ///The minimal amount of probability mass which advanced outcomes must represent.
        ///Only outcomes which contribute to the top "advancePercentage" will be explored.
        ///</param>    
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage)
        {
            mPosTagger = tagger;
            mBasalChunker = chunker;
            mBuildModel = buildModel;
            mCheckModel = checkModel;
            M = beamSize;
            K = beamSize;
            Q = advancePercentage;

            mBuildProbabilities = new double[mBuildModel.OutcomeCount];
            mCheckProbabilities = new double[mCheckModel.OutcomeCount];
            mBuildContextGenerator = new BuildContextGenerator();
            mCheckContextGenerator = new CheckContextGenerator();
            mHeadRules = headRules;
            mOldDerivationsHeap = new Util.TreeSet<Parse>();
            mNewDerivationsHeap = new Util.TreeSet<Parse>();
            mParses = new Util.TreeSet<Parse>();

            mStartTypeMap = new Dictionary<string, string>();
            mContinueTypeMap = new Dictionary<string, string>();
            for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++)
            {
                string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
                if (outcome.StartsWith(StartPrefix))
                {
                    //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
                    mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
                }
                else if (outcome.StartsWith(ContinuePrefix))
                {
                    //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
                    mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
                }
            }
            mTopStartIndex = buildModel.GetOutcomeIndex(mTopStart);
            mCompleteIndex = checkModel.GetOutcomeIndex(CompleteOutcome);
            mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
        }