Пример #1
0
        ///<summary>
        ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        ///<param name="beamSize">
        ///The number of different parses kept during parsing.
        ///</param>
        ///<param name="advancePercentage">
        ///The minimal amount of probability mass which advanced outcomes must represent.
        ///Only outcomes which contribute to the top "advancePercentage" will be explored.
        ///</param>
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage)
        {
            posTagger       = tagger;
            basalChunker    = chunker;
            this.buildModel = buildModel;
            this.checkModel = checkModel;
            m = beamSize;
            k = beamSize;
            q = advancePercentage;

            buildContextGenerator = new BuildContextGenerator();
            checkContextGenerator = new CheckContextGenerator();
            this.headRules        = headRules;

            startTypeMap    = new Dictionary <string, string>();
            continueTypeMap = new Dictionary <string, string>();
            for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++)
            {
                string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
                if (outcome.StartsWith(StartPrefix))
                {
                    //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
                    startTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
                }
                else if (outcome.StartsWith(ContinuePrefix))
                {
                    //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
                    continueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
                }
            }
            topStartIndex   = buildModel.GetOutcomeIndex(MTopStart);
            completeIndex   = checkModel.GetOutcomeIndex(CompleteOutcome);
            incompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
        }
 public MaximumEntropyPosTagger(int beamSize, SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary)
 {
     mBeamSize         = beamSize;
     mPosModel         = model;
     mContextGenerator = contextGenerator;
     Beam        = new PosBeamSearch(this, mBeamSize, contextGenerator, model);
     mDictionary = dictionary;
 }
Пример #3
0
 /// <summary>
 /// Class constructor which takes the string locations of the
 /// information which the maxent model needs.
 /// </summary>
 public MaximumEntropyTokenizer(SharpEntropy.IMaximumEntropyModel model)
 {
     mContextGenerator         = new TokenContextGenerator();
     mAlphaNumericOptimization = false;
     mModel              = model;
     mNewTokens          = new List <Util.Span>();
     mTokenProbabilities = new List <double>(50);
 }
Пример #4
0
 public MaximumEntropyPosTagger(int beamSize, SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary)
 {
     UseClosedClassTagsFilter = false;
     this.BeamSize            = beamSize;
     this.PosModel            = model;
     this.ContextGenerator    = contextGenerator;
     Beam = new PosBeamSearch(this, this.BeamSize, contextGenerator, model);
     this.TagDictionary = dictionary;
 }
Пример #5
0
        /// <summary>
        /// Creates new search object.
        /// </summary>
        /// <param name="size">
        /// The size of the beam (k).
        /// </param>
        /// <param name="contextGenerator">
        /// the context generator for the model.
        /// </param>
        /// <param name="model">
        /// the model for assigning probabilities to the sequence outcomes.
        /// </param>
        /// <param name="cacheSize">
        /// size of the cache to use for performance.
        /// </param>
        public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSize)
        {
            Size             = size;
            ContextGenerator = contextGenerator;
            Model            = model;

            mProbabilities = new double[model.OutcomeCount];
            if (cacheSize > 0)
            {
                mContextsCache = new Cache(cacheSize);
            }
        }
Пример #6
0
        /// <summary>Creates new search object</summary>
        /// <param name="size">The size of the beam (k)</param>
        /// <param name="contextGenerator">the context generator for the model</param>
        /// <param name="model">the model for assigning probabilities to the sequence outcomes</param>
        /// <param name="cacheSize">size of the cache to use for performance</param>
        public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSize)
        {
            Size = size;
            ContextGenerator = contextGenerator;
            Model = model;

            _probabilities = new double[model.OutcomeCount];
            if (cacheSize > 0)
            {
                _contextsCache = new Cache(cacheSize);
            }
        }
Пример #7
0
 private SimilarityModel(string modelName, bool train)
 {
     ModelName = modelName;
     if (train)
     {
         _events = new List <SharpEntropy.TrainingEvent>();
     }
     else
     {
         _testModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + ModelExtension));
         _sameIndex = _testModel.GetOutcomeIndex(Same);
     }
 }
Пример #8
0
 private SimilarityModel(string modelName, bool train)
 {
     mModelName = modelName;
     if (train)
     {
         mEvents = new List<SharpEntropy.TrainingEvent>();
     }
     else
     {
         mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension));
         mSameIndex = mTestModel.GetOutcomeIndex(mSame);
     }
 }
Пример #9
0
        private NumberModel(string modelName, bool train)
        {
            mModelName = modelName;
            if (train)
            {
                mEvents = new List <SharpEntropy.TrainingEvent>();
            }
            else
            {
                mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension));

                mSingularIndex = mTestModel.GetOutcomeIndex(NumberEnum.Singular.ToString());
                mPluralIndex   = mTestModel.GetOutcomeIndex(NumberEnum.Plural.ToString());
            }
        }
Пример #10
0
        private NumberModel(string modelName, bool train)
        {
            mModelName = modelName;
            if (train)
            {
                mEvents = new List<SharpEntropy.TrainingEvent>();
            }
            else
            {
                mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension));

                mSingularIndex = mTestModel.GetOutcomeIndex(NumberEnum.Singular.ToString());
                mPluralIndex = mTestModel.GetOutcomeIndex(NumberEnum.Plural.ToString());
            }
        }
Пример #11
0
        /// <summary>Creates new search object</summary>
        /// <param name="size">The size of the beam (k)</param>
        /// <param name="contextGenerator">the context generator for the model</param>
        /// <param name="model">the model for assigning probabilities to the sequence outcomes</param>
        /// <param name="cacheSizeInMegaBytes">size of the cache to use for performance</param>
        public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator,
                          SharpEntropy.IMaximumEntropyModel model, int cacheSizeInMegaBytes)
        {
            Size             = size;
            ContextGenerator = contextGenerator;
            Model            = model;

            if (cacheSizeInMegaBytes > 0)
            {
                var properties = new NameValueCollection
                {
                    { "cacheMemoryLimitMegabytes", cacheSizeInMegaBytes.ToString() }
                };
                contextsCache = new MemoryCache("beamSearchContextCache", properties);
            }
        }
Пример #12
0
        /// <summary>Creates new search object</summary>
        /// <param name="size">The size of the beam (k)</param>
        /// <param name="contextGenerator">the context generator for the model</param>
        /// <param name="model">the model for assigning probabilities to the sequence outcomes</param>
        /// <param name="cacheSizeInMegaBytes">size of the cache to use for performance</param>
        public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator,
            SharpEntropy.IMaximumEntropyModel model, int cacheSizeInMegaBytes)
        {
            Size = size;
            ContextGenerator = contextGenerator;
            Model = model;

            if (cacheSizeInMegaBytes > 0)
            {
                var properties = new NameValueCollection
			    {
			        {"cacheMemoryLimitMegabytes", cacheSizeInMegaBytes.ToString()}
			    };
                contextsCache = new MemoryCache("beamSearchContextCache", properties);
            }
        }
Пример #13
0
        private GenderModel(string modelName, bool train)
        {
            mModelName   = modelName;
            mMaleNames   = ReadNames(modelName + ".mal");
            mFemaleNames = ReadNames(modelName + ".fem");
            if (train)
            {
                mEvents = new List <SharpEntropy.TrainingEvent>();
            }
            else
            {
                mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension));

                mMaleIndex   = mTestModel.GetOutcomeIndex(GenderEnum.Male.ToString());
                mFemaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Female.ToString());
                mNeuterIndex = mTestModel.GetOutcomeIndex(GenderEnum.Neuter.ToString());
            }
        }
Пример #14
0
 private GenderModel(string modelName, bool train)
 {
     mModelName = modelName;
     mMaleNames = ReadNames(modelName + ".mal");
     mFemaleNames = ReadNames(modelName + ".fem");
     if (train)
     {
         mEvents = new List<SharpEntropy.TrainingEvent>();
     }
     else
     {
         mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension));
         
         mMaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Male.ToString());
         mFemaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Female.ToString());
         mNeuterIndex = mTestModel.GetOutcomeIndex(GenderEnum.Neuter.ToString());
     }
 }
 public DefaultNonReferentialResolver(string projectName, string name, ResolverMode mode)
 {
     mResolverMode = mode;
     mModelName    = projectName + "\\" + name + "_nr";
     if (mode == ResolverMode.Train)
     {
         mEvents = new List <SharpEntropy.TrainingEvent>();
     }
     else if (mode == ResolverMode.Test)
     {
         mModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(mModelName + mModelExtension));
         mNonReferentialIndex = mModel.GetOutcomeIndex(MaximumEntropyResolver.Same);
     }
     else
     {
         throw new ArgumentException("unexpected mode " + mode);
     }
 }
        public DefaultNonReferentialResolver(string projectName, string name, ResolverMode mode)
        {
            mResolverMode = mode;
            mModelName = projectName + Path.DirectorySeparatorChar + name + "_nr";
            if (mode == ResolverMode.Train)
            {
                mEvents = new List<SharpEntropy.TrainingEvent>();
            }
            else if (mode == ResolverMode.Test)
            {

                mModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(mModelName + mModelExtension));
                mNonReferentialIndex = mModel.GetOutcomeIndex(MaximumEntropyResolver.Same);
            }
            else
            {
                throw new ArgumentException("unexpected mode " + mode);
            }
        }
Пример #17
0
        public virtual void LocalEvaluate(SharpEntropy.IMaximumEntropyModel posModel, StreamReader reader, out double accuracy, out double sentenceAccuracy)
        {
            this.PosModel = posModel;
            float total = 0, correct = 0, sentences = 0, sentencesCorrect = 0;

            var    sentenceReader = new StreamReader(reader.BaseStream, System.Text.Encoding.UTF7);
            string line;

            while ((object)(line = sentenceReader.ReadLine()) != null)
            {
                sentences++;
                var annotatedPair = PosEventReader.ConvertAnnotatedString(line);
                var words         = annotatedPair.Item1;
                var outcomes      = annotatedPair.Item2;
                var tags          = new ArrayList(Beam.BestSequence(words.ToArray(), null).Outcomes);

                int  count        = 0;
                bool isSentenceOk = true;
                for (IEnumerator tagIndex = tags.GetEnumerator(); tagIndex.MoveNext(); count++)
                {
                    total++;
                    var tag = (string)tagIndex.Current;
                    if (tag == (string)outcomes[count])
                    {
                        correct++;
                    }
                    else
                    {
                        isSentenceOk = false;
                    }
                }
                if (isSentenceOk)
                {
                    sentencesCorrect++;
                }
            }

            accuracy         = correct / total;
            sentenceAccuracy = sentencesCorrect / sentences;
        }
        public virtual void LocalEvaluate(SharpEntropy.IMaximumEntropyModel posModel, System.IO.StreamReader reader, out double accuracy, out double sentenceAccuracy)
        {
            mPosModel = posModel;
            float total = 0, correct = 0, sentences = 0, sentencesCorrect = 0;

            System.IO.StreamReader sentenceReader = new System.IO.StreamReader(reader.BaseStream, System.Text.Encoding.UTF7);
            string line;

            while ((object)(line = sentenceReader.ReadLine()) != null)
            {
                sentences++;
                Util.Pair <ArrayList, ArrayList> annotatedPair = PosEventReader.ConvertAnnotatedString(line);
                ArrayList words    = annotatedPair.FirstValue;
                ArrayList outcomes = annotatedPair.SecondValue;
                ArrayList tags     = new ArrayList(Beam.BestSequence(words, null).Outcomes);

                int  count        = 0;
                bool isSentenceOK = true;
                for (System.Collections.IEnumerator tagIndex = tags.GetEnumerator(); tagIndex.MoveNext(); count++)
                {
                    total++;
                    string tag = (string)tagIndex.Current;
                    if (tag == (string)outcomes[count])
                    {
                        correct++;
                    }
                    else
                    {
                        isSentenceOK = false;
                    }
                }
                if (isSentenceOK)
                {
                    sentencesCorrect++;
                }
            }

            accuracy         = correct / total;
            sentenceAccuracy = sentencesCorrect / sentences;
        }
Пример #19
0
        ///<summary>
        ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        ///<param name="beamSize">
        ///The number of different parses kept during parsing.
        ///</param>
        ///<param name="advancePercentage">
        ///The minimal amount of probability mass which advanced outcomes must represent.
        ///Only outcomes which contribute to the top "advancePercentage" will be explored.
        ///</param>
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage)
        {
            mPosTagger    = tagger;
            mBasalChunker = chunker;
            mBuildModel   = buildModel;
            mCheckModel   = checkModel;
            M             = beamSize;
            K             = beamSize;
            Q             = advancePercentage;

            mBuildProbabilities    = new double[mBuildModel.OutcomeCount];
            mCheckProbabilities    = new double[mCheckModel.OutcomeCount];
            mBuildContextGenerator = new BuildContextGenerator();
            mCheckContextGenerator = new CheckContextGenerator();
            mHeadRules             = headRules;
            mOldDerivationsHeap    = new Util.TreeSet <Parse>();
            mNewDerivationsHeap    = new Util.TreeSet <Parse>();
            mParses = new Util.TreeSet <Parse>();

            mStartTypeMap    = new Dictionary <string, string>();
            mContinueTypeMap = new Dictionary <string, string>();
            for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++)
            {
                string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
                if (outcome.StartsWith(StartPrefix))
                {
                    //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
                    mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
                }
                else if (outcome.StartsWith(ContinuePrefix))
                {
                    //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
                    mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
                }
            }
            mTopStartIndex   = buildModel.GetOutcomeIndex(MTopStart);
            mCompleteIndex   = checkModel.GetOutcomeIndex(CompleteOutcome);
            mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
        }
Пример #20
0
 /// <summary>
 /// Creates a maximum-entropy-based resolver with the specified model name, using the
 /// specified mode, which will look the specified number of entities back for a referent and
 /// prefer the first referent if specified.
 /// </summary>
 /// <param name="modelDirectory">
 /// The name of the directory where the resover models are stored.
 /// </param>
 /// <param name="name">
 /// The name of the file where this model will be read or written.
 /// </param>
 /// <param name="mode">
 /// The mode this resolver is being using in (training, testing).
 /// </param>
 /// <param name="numberOfEntitiesBack">
 /// The number of entities back in the text that this resolver will look
 /// for a referent.
 /// </param>
 /// <param name="preferFirstReferent">
 /// Set to true if the resolver should prefer the first referent which is more
 /// likly than non-reference.  This only affects testing.
 /// </param>
 /// <param name="nonReferentialResolver">
 /// Determines how likly it is that this entity is non-referential.
 /// </param>
 protected MaximumEntropyResolver(string modelDirectory, string name, ResolverMode mode, int numberOfEntitiesBack, bool preferFirstReferent, INonReferentialResolver nonReferentialResolver) : base(numberOfEntitiesBack)
 {
     mPreferFirstReferent    = preferFirstReferent;
     mNonReferentialResolver = nonReferentialResolver;
     mResolverMode           = mode;
     mModelName = modelDirectory + "/" + name;
     if (mResolverMode == ResolverMode.Test)
     {
         mModel     = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(mModelName + mModelExtension));
         mSameIndex = mModel.GetOutcomeIndex(Same);
     }
     else if (mResolverMode == ResolverMode.Train)
     {
         mEvents = new List <SharpEntropy.TrainingEvent>();
     }
     else
     {
         System.Console.Error.WriteLine("Unknown mode: " + mResolverMode);
     }
     //add one for non-referent possibility
     mCandidateProbabilities = new double[GetNumberEntitiesBack() + 1];
 }
 /// <summary> 
 /// Creates a new <code>MaximumEntropySentenceDetector</code> instance.
 /// </summary>
 /// <param name="model">
 /// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to
 /// evaluate end-of-sentence decisions.
 /// </param>
 /// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector
 /// will use to turn strings into contexts for the model to
 /// evaluate.
 /// </param>
 /// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector
 /// will use to locate end of sentence indexes.
 /// </param>
 public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator, IEndOfSentenceScanner scanner)
 {
     mModel = model;
     mContextGenerator = contextGenerator;
     mScanner = scanner;
 }
 public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model, PosLookupList dictionary) : this(mDefaultBeamSize, model, new DefaultPosContextGenerator(), dictionary)
 {
 }
Пример #23
0
 public PosBeamSearch(MaximumEntropyPosTagger posTagger, int size, IPosContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSize) :
     base(size, contextGenerator, model, cacheSize)
 {
     _maxentPosTagger = posTagger;
 }
 /// <summary>
 /// Class constructor which takes the string locations of the
 /// information which the maxent model needs.
 /// </summary>
 public MaximumEntropyTokenizer(SharpEntropy.IMaximumEntropyModel model)
 {
     mContextGenerator = new TokenContextGenerator();
     mAlphaNumericOptimization = false;
     mModel = model;
     mNewTokens = new List<Util.Span>();
     mTokenProbabilities = new List<double>(50);
 }
Пример #25
0
 public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator) :
     this(DefaultBeamSize, model, contextGenerator, null)
 {
 }
Пример #26
0
		/// <summary>
		/// Constructor which takes a IMaximumEntropyModel and a IContextGenerator.
		/// calls the three-arg constructor with a default ed of sentence scanner.
		/// </summary>
		/// <param name="model">
		/// The MaxentModel which this SentenceDetectorME will use to
		/// evaluate end-of-sentence decisions.
		/// </param>
		/// <param name="contextGenerator">
		/// The IContextGenerator object which this MaximumEntropySentenceDetector
		/// will use to turn strings into contexts for the model to
		/// evaluate.
		/// </param>
        public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator)
            : this(model, contextGenerator, new DefaultEndOfSentenceScanner())
		{
		}
Пример #27
0
 /// <summary>
 /// Creates a chunker using the specified model and context generator and decodes the
 /// model using a beam search of the specified size.
 /// </summary>
 /// <param name="model">
 /// The maximum entropy model for this chunker.
 /// </param>
 /// <param name="contextGenerator">
 /// The context generator to be used by the specified model.
 /// </param>
 /// <param name="beamSize">
 /// The size of the beam that should be used when decoding sequences.
 /// </param>
 public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model, IChunkerContextGenerator contextGenerator, int beamSize)
 {
     mBeam  = new ChunkBeamSearch(this, beamSize, contextGenerator, model);
     mModel = model;
 }
Пример #28
0
		///<summary>
		///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
		///</summary>
		///<param name="buildModel">
		///The model to assign constituent labels.
		///</param>
		///<param name="checkModel">
		///The model to determine a constituent is complete.
		///</param>
		///<param name="tagger">
		///The model to assign pos-tags.
		///</param>
		///<param name="chunker">
		///The model to assign flat constituent labels.
		///</param>
		///<param name="headRules">
		///The head rules for head word perculation.
		///</param>
		///<param name="beamSize">
		///The number of different parses kept during parsing.
		///</param>
		///<param name="advancePercentage">
		///The minimal amount of probability mass which advanced outcomes must represent.
		///Only outcomes which contribute to the top "advancePercentage" will be explored.
		///</param>    
		public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) 
		{
			posTagger = tagger;
			basalChunker = chunker;
			this.buildModel = buildModel;
			this.checkModel = checkModel;
			m = beamSize;
			k = beamSize;
			q = advancePercentage;

			buildContextGenerator = new BuildContextGenerator();
			checkContextGenerator = new CheckContextGenerator();
			this.headRules = headRules;
			
			startTypeMap = new Dictionary<string, string>();
            continueTypeMap = new Dictionary<string, string>();
			for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) 
			{
				string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
				if (outcome.StartsWith(StartPrefix)) 
				{
					//System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
					startTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
				}
				else if (outcome.StartsWith(ContinuePrefix)) 
				{
					//System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
					continueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
				}
			}
			topStartIndex = buildModel.GetOutcomeIndex(MTopStart);
			completeIndex = checkModel.GetOutcomeIndex(CompleteOutcome);
			incompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
		}
Пример #29
0
 /// <summary>
 /// Creates a new name finder with the specified model and context generator.
 /// </summary>
 /// <param name="model">
 /// The model to be used to find names.
 /// </param>
 /// <param name="contextGenerator">
 /// The context generator to be used with this name finder.
 /// </param>
 /// <param name="beamSize">
 /// The size of the beam to be used in decoding this model.
 /// </param>
 public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model, INameContextGenerator contextGenerator, int beamSize)
 {
     mModel            = model;
     mContextGenerator = contextGenerator;
     mBeam             = new NameBeamSearch(this, beamSize, contextGenerator, model, beamSize);
 }
Пример #30
0
		/// <summary>
		/// Creates a chunker using the specified model and context generator and decodes the
		/// model using a beam search of the specified size.
		/// </summary>
		/// <param name="model">The maximum entropy model for this chunker</param>
		/// <param name="contextGenerator">The context generator to be used by the specified model</param>
		/// <param name="beamSize">The size of the beam that should be used when decoding sequences</param>
		public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model, IChunkerContextGenerator contextGenerator, int beamSize)
		{
			Beam = new ChunkBeamSearch(this, beamSize, contextGenerator, model);
			Model = model;
		}
Пример #31
0
 /// <summary>
 /// Creates a new name finder with the specified model and context generator.
 /// </summary>
 /// <param name="model">
 /// The model to be used to find names.
 /// </param>
 /// <param name="contextGenerator">
 /// The context generator to be used with this name finder.
 /// </param>
 public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model, INameContextGenerator contextGenerator) :
     this(model, contextGenerator, 10)
 {
 }
Пример #32
0
        // Constructors ----------------------------------------------

        /// <summary>
        /// Creates a new name finder with the specified model.
        /// </summary>
        /// <param name="model">
        /// The model to be used to find names.
        /// </param>
        public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model) :
            this(model, new DefaultNameContextGenerator(10), 10)
        {
        }
Пример #33
0
        // Constructors -------------------------

        ///<summary>
        ///Creates a new parser using the specified models and head rules.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules) : this(buildModel, checkModel, tagger, chunker, headRules, DefaultBeamSize, DefaultAdvancePercentage)
        {
        }
 /// <summary>
 /// Creates a maximum-entropy-based resolver with the specified model name, using the 
 /// specified mode, which will look the specified number of entities back for a referent and
 /// prefer the first referent if specified.
 /// </summary>
 /// <param name="modelDirectory">
 /// The name of the directory where the resover models are stored.
 /// </param>
 /// <param name="name">
 /// The name of the file where this model will be read or written.
 /// </param>
 /// <param name="mode">
 /// The mode this resolver is being using in (training, testing).
 /// </param>
 /// <param name="numberOfEntitiesBack">
 /// The number of entities back in the text that this resolver will look
 /// for a referent.
 /// </param>
 /// <param name="preferFirstReferent">
 /// Set to true if the resolver should prefer the first referent which is more
 /// likly than non-reference.  This only affects testing.
 /// </param>
 /// <param name="nonReferentialResolver">
 /// Determines how likly it is that this entity is non-referential.
 /// </param>
 protected MaximumEntropyResolver(string modelDirectory, string name, ResolverMode mode, int numberOfEntitiesBack, bool preferFirstReferent, INonReferentialResolver nonReferentialResolver)
     : base(numberOfEntitiesBack)
 {
     mPreferFirstReferent = preferFirstReferent;
     mNonReferentialResolver = nonReferentialResolver;
     mResolverMode = mode;
     mModelName = modelDirectory + "/" + name;
     if (mResolverMode == ResolverMode.Test)
     {
         mModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(mModelName + mModelExtension));
         mSameIndex = mModel.GetOutcomeIndex(Same);
     }
     else if (mResolverMode == ResolverMode.Train)
     {
         mEvents = new List<SharpEntropy.TrainingEvent>();
     }
     else
     {
         System.Console.Error.WriteLine("Unknown mode: " + mResolverMode);
     }
     //add one for non-referent possibility
     mCandidateProbabilities = new double[GetNumberEntitiesBack() + 1];
 }
Пример #35
0
 public ChunkBeamSearch(MaximumEntropyChunker maxentChunker, int size, IChunkerContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model) : base(size, contextGenerator, model)
 {
     mMaxentChunker = maxentChunker;
 }
 /// <summary>
 /// Creates a new name finder with the specified model and context generator.
 /// </summary>
 /// <param name="model">
 /// The model to be used to find names.
 /// </param>
 /// <param name="contextGenerator">
 /// The context generator to be used with this name finder.
 /// </param>
 /// <param name="beamSize">
 /// The size of the beam to be used in decoding this model.
 /// </param>
 public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model, INameContextGenerator contextGenerator, int beamSize)
 {
     mModel = model;
     mContextGenerator = contextGenerator;
     mBeam = new NameBeamSearch(this, beamSize, contextGenerator, model, beamSize);
 }
Пример #37
0
 /// <summary>
 /// Creates a chunker using the specified model.
 /// </summary>
 /// <param name="model">
 /// The maximum entropy model for this chunker.
 /// </param>
 public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model) : this(model, new DefaultChunkerContextGenerator(), 10)
 {
 }
Пример #38
0
        // Constructors ------------------------

        public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model) :
            this(model, new DefaultPosContextGenerator())
        {
        }
Пример #39
0
 /// <summary>
 /// Creates a chunker using the specified model and context generator.
 /// </summary>
 /// <param name="model">
 /// The maximum entropy model for this chunker.
 /// </param>
 /// <param name="contextGenerator">
 /// The context generator to be used by the specified model.
 /// </param>
 public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model, IChunkerContextGenerator contextGenerator) : this(model, contextGenerator, 10)
 {
 }
Пример #40
0
 public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary) :
     this(DefaultBeamSize, model, contextGenerator, dictionary)
 {
 }
Пример #41
0
        // Constructors -----------------

        /// <summary>Creates new search object</summary>
        /// <param name="size">The size of the beam (k)</param>
        /// <param name="contextGenerator">the context generator for the model</param>
        /// <param name="model">the model for assigning probabilities to the sequence outcomes</param>
        public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator,
                          SharpEntropy.IMaximumEntropyModel model) :
            this(size, contextGenerator, model, 0)
        {
        }
        public virtual void LocalEvaluate(SharpEntropy.IMaximumEntropyModel posModel, System.IO.StreamReader reader, out double accuracy, out double sentenceAccuracy)
        {
            mPosModel = posModel;
            float total = 0, correct = 0, sentences = 0, sentencesCorrect = 0;

            System.IO.StreamReader sentenceReader = new System.IO.StreamReader(reader.BaseStream, System.Text.Encoding.UTF7);
            string line;

            while ((object) (line = sentenceReader.ReadLine()) != null)
            {
                sentences++;
                Util.Pair<ArrayList, ArrayList> annotatedPair = PosEventReader.ConvertAnnotatedString(line);
                ArrayList words = annotatedPair.FirstValue;
                ArrayList outcomes = annotatedPair.SecondValue;
                ArrayList tags = new ArrayList(Beam.BestSequence(words, null).Outcomes);

                int count = 0;
                bool isSentenceOK = true;
                for (System.Collections.IEnumerator tagIndex = tags.GetEnumerator(); tagIndex.MoveNext(); count++)
                {
                    total++;
                    string tag = (string) tagIndex.Current;
                    if (tag == (string)outcomes[count])
                    {
                        correct++;
                    }
                    else
                    {
                        isSentenceOK = false;
                    }
                }
                if (isSentenceOK)
                {
                    sentencesCorrect++;
                }
            }

            accuracy = correct / total;
            sentenceAccuracy = sentencesCorrect / sentences;
        }
 public MaximumEntropyPosTagger(int beamSize, SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary)
 {
     mBeamSize = beamSize;
     mPosModel = model;
     mContextGenerator = contextGenerator;
     Beam = new PosBeamSearch(this, mBeamSize, contextGenerator, model);
     mDictionary = dictionary;
 }
Пример #44
0
 /// <summary>
 /// Creates a beam seach of the specified size using the specified model with the specified context generator.
 /// </summary>
 /// <param name="nameFinder">
 /// The associated MaximumEntropyNameFinder instance.
 /// </param>
 /// <param name="size">
 /// The size of the beam.
 /// </param>
 /// <param name="contextGenerator">
 /// The context generator used with the specified model.
 /// </param>
 /// <param name="model">
 /// The model used to determine names.
 /// </param>
 /// <param name="beamSize">
 /// The size of the beam to use in searching.
 /// </param>
 public NameBeamSearch(MaximumEntropyNameFinder nameFinder, int size, INameContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int beamSize) :
     base(size, contextGenerator, model, beamSize)
 {
     _nameFinder = nameFinder;
 }
        ///<summary>
        ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage.
        ///</summary>
        ///<param name="buildModel">
        ///The model to assign constituent labels.
        ///</param>
        ///<param name="checkModel">
        ///The model to determine a constituent is complete.
        ///</param>
        ///<param name="tagger">
        ///The model to assign pos-tags.
        ///</param>
        ///<param name="chunker">
        ///The model to assign flat constituent labels.
        ///</param>
        ///<param name="headRules">
        ///The head rules for head word perculation.
        ///</param>
        ///<param name="beamSize">
        ///The number of different parses kept during parsing.
        ///</param>
        ///<param name="advancePercentage">
        ///The minimal amount of probability mass which advanced outcomes must represent.
        ///Only outcomes which contribute to the top "advancePercentage" will be explored.
        ///</param>    
        public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage)
        {
            mPosTagger = tagger;
            mBasalChunker = chunker;
            mBuildModel = buildModel;
            mCheckModel = checkModel;
            M = beamSize;
            K = beamSize;
            Q = advancePercentage;

            mBuildProbabilities = new double[mBuildModel.OutcomeCount];
            mCheckProbabilities = new double[mCheckModel.OutcomeCount];
            mBuildContextGenerator = new BuildContextGenerator();
            mCheckContextGenerator = new CheckContextGenerator();
            mHeadRules = headRules;
            mOldDerivationsHeap = new Util.TreeSet<Parse>();
            mNewDerivationsHeap = new Util.TreeSet<Parse>();
            mParses = new Util.TreeSet<Parse>();

            mStartTypeMap = new Dictionary<string, string>();
            mContinueTypeMap = new Dictionary<string, string>();
            for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++)
            {
                string outcome = buildModel.GetOutcomeName(buildOutcomeIndex);
                if (outcome.StartsWith(StartPrefix))
                {
                    //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length));
                    mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length));
                }
                else if (outcome.StartsWith(ContinuePrefix))
                {
                    //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length));
                    mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length));
                }
            }
            mTopStartIndex = buildModel.GetOutcomeIndex(mTopStart);
            mCompleteIndex = checkModel.GetOutcomeIndex(CompleteOutcome);
            mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome);
        }