Example #1
0
		/// <summary>
		/// Create an event reader based on the specified data reader of the specified type using the specified head rules.
		/// </summary>
		/// <param name="dataReader">
		/// A 1-parse-per-line Penn Treebank Style parse. 
		/// </param>
		/// <param name="rules">
		/// The head rules.
		/// </param>
		/// <param name="eventType">
		/// The type of events desired (tag, chunk, build, or check).
		/// </param>
        public ParserEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IHeadRules rules, EventType eventType)
		{
			if (eventType == EventType.Build)
			{
				mBuildContextGenerator = new BuildContextGenerator();
			}
			else if (eventType == EventType.Check)
			{
				mCheckContextGenerator = new CheckContextGenerator();
			}
			else if (eventType == EventType.Chunk)
			{
				mChunkContextGenerator = new ChunkContextGenerator();
			}
			else if (eventType == EventType.Tag)
			{
				mPosContextGenerator = new PosTagger.DefaultPosContextGenerator();
			}
			mHeadRules = rules;
			mEventType = eventType;
			mDataReader = dataReader;
			mEventIndex = 0;
			if (dataReader.HasNext())
			{
				AddNewEvents();
			}
			else
			{
				mEvents = new SharpEntropy.TrainingEvent[0];
			}
		}
		/// <summary>
		/// Creates a new event reader based on the specified data reader using the specified context generator.
		/// </summary>
		/// <param name="dataReader">
		/// The data reader for this event reader.
		/// </param>
		/// <param name="contextGenerator">
		/// The context generator which should be used in the creation of events for this event stream.
		/// </param>
		public NameFinderEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, INameContextGenerator contextGenerator)
		{
			mDataReader = dataReader;
			mContextGenerator = contextGenerator;
			mEventIndex = 0;
            mPreviousTags = new Dictionary<string, string>();

			//prime events with first line of data stream.
			if (mDataReader.HasNext())
			{
				mLine = mDataReader.NextToken();
				if (mLine.Length == 0)
				{
					mPreviousTags.Clear();
				}
				else
				{
					AddEvents(mLine);
				}
			}
			else
			{
				mEvents = new SharpEntropy.TrainingEvent[0];
			}
		}
Example #3
0
        /// <summary>Creates new search object</summary>
        /// <param name="size">The size of the beam (k)</param>
        /// <param name="contextGenerator">the context generator for the model</param>
        /// <param name="model">the model for assigning probabilities to the sequence outcomes</param>
        /// <param name="cacheSize">size of the cache to use for performance</param>
        public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSize)
        {
            Size = size;
            ContextGenerator = contextGenerator;
            Model = model;

            _probabilities = new double[model.OutcomeCount];
            if (cacheSize > 0)
            {
                _contextsCache = new Cache(cacheSize);
            }
        }
Example #4
0
		/// <summary>
		/// Creates a new event reader based on the specified data reader using the specified context generator.
		/// </summary>
		/// <param name="dataReader">
		/// The data reader for this event reader.
		/// </param>
		/// <param name="contextGenerator">
		/// The context generator which should be used in the creation of events for this event reader.
		/// </param>
        public ChunkerEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IChunkerContextGenerator contextGenerator)
		{
			mContextGenerator = contextGenerator;
			mDataReader = dataReader;
			mEventIndex = 0;
			if (dataReader.HasNext())
			{
				AddNewEvents();
			}
			else
			{
				mEvents = new SharpEntropy.TrainingEvent[0];
			}
		}
 public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IEndOfSentenceScanner scanner, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator)
 {
     mDataReader = dataReader;
     mScanner = scanner;
     mContextGenerator = contextGenerator;
     if (mDataReader.HasNext())
     {
         string current = mDataReader.NextToken();
         if (mDataReader.HasNext())
         {
             mNext = mDataReader.NextToken();
         }
         AddNewEvents(current);
     }
 }
Example #6
0
        /// <summary>Creates new search object</summary>
        /// <param name="size">The size of the beam (k)</param>
        /// <param name="contextGenerator">the context generator for the model</param>
        /// <param name="model">the model for assigning probabilities to the sequence outcomes</param>
        /// <param name="cacheSizeInMegaBytes">size of the cache to use for performance</param>
        public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator,
            SharpEntropy.IMaximumEntropyModel model, int cacheSizeInMegaBytes)
        {
            Size = size;
            ContextGenerator = contextGenerator;
            Model = model;

            if (cacheSizeInMegaBytes > 0)
            {
                var properties = new NameValueCollection
			    {
			        {"cacheMemoryLimitMegabytes", cacheSizeInMegaBytes.ToString()}
			    };
                contextsCache = new MemoryCache("beamSearchContextCache", properties);
            }
        }
 /// <summary>
 /// Constructor which takes a IMaximumEntropyModel and calls the three-arg
 /// constructor with that model, a SentenceDetectionContextGenerator, and the
 /// default end of sentence scanner.
 /// </summary>
 /// <param name="model">
 /// The MaxentModel which this SentenceDetectorME will use to
 /// evaluate end-of-sentence decisions.
 /// </param>
 public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model)
     : this(model, new SentenceDetectionContextGenerator(DefaultEndOfSentenceScanner.GetEndOfSentenceCharacters()), new DefaultEndOfSentenceScanner())
 {
     mSentenceProbs = new List<double>(50);
     mUnicodeMapping = false;
 }
 /// <summary> 
 /// Creates a new <code>MaximumEntropySentenceDetector</code> instance.
 /// </summary>
 /// <param name="model">
 /// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to
 /// evaluate end-of-sentence decisions.
 /// </param>
 /// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector
 /// will use to turn strings into contexts for the model to
 /// evaluate.
 /// </param>
 /// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector
 /// will use to locate end of sentence indexes.
 /// </param>
 public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator, IEndOfSentenceScanner scanner)
 {
     mModel = model;
     mContextGenerator = contextGenerator;
     mScanner = scanner;
 }
		/// <summary>
		/// Creates a chunker using the specified model and context generator and decodes the
		/// model using a beam search of the specified size.
		/// </summary>
		/// <param name="model">The maximum entropy model for this chunker</param>
		/// <param name="contextGenerator">The context generator to be used by the specified model</param>
		/// <param name="beamSize">The size of the beam that should be used when decoding sequences</param>
		public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model, IChunkerContextGenerator contextGenerator, int beamSize)
		{
			Beam = new ChunkBeamSearch(this, beamSize, contextGenerator, model);
			Model = model;
		}
Example #10
0
        // Constructors ---------------

		/// <summary>Creates a chunker using the specified model</summary>
		/// <param name="model">The maximum entropy model for this chunker</param>
		public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model):
            this(model, new DefaultChunkerContextGenerator(), 10){}
Example #11
0
		/// <summary>
		/// Trains the chunker.
		/// Training file should be one word per line where each line consists of a
		/// space-delimited triple of "word pos outcome".
		/// Sentence breaks are indicated by blank lines.
		/// </summary>
		/// <param name="eventReader">The chunker event reader</param>
		/// <param name="iterations">The number of iterations to perform</param>
		/// <param name="cutoff">
		/// The number of times a predicate must be seen in order
		/// to be relevant for training.
		/// </param>
		/// <returns>Trained model</returns>
		public static SharpEntropy.GisModel Train(SharpEntropy.ITrainingEventReader eventReader, int iterations, int cutoff)
		{
			var trainer = new SharpEntropy.GisTrainer();
			trainer.TrainModel(iterations, new SharpEntropy.TwoPassDataIndexer(eventReader, cutoff));
			return new SharpEntropy.GisModel(trainer);
		}
Example #12
0
		/// <summary>
		/// Creates a new event reader based on the specified data reader.
		/// </summary>
		/// <param name="dataReader">
		/// The data stream for this event reader.
		/// </param>
		public NameFinderEventReader(SharpEntropy.ITrainingDataReader<string> dataReader) : this(dataReader, new DefaultNameContextGenerator())
		{
		}
Example #13
0
 public MaxentTokenizer(SharpEntropy.IO.IGisModelReader modelReader)
 {
     mModel = new GisModel(modelReader);
 }
 /// <summary>
 /// Creates a new name finder with the specified model and context generator.
 /// </summary>
 /// <param name="model">
 /// The model to be used to find names.
 /// </param>
 /// <param name="contextGenerator">
 /// The context generator to be used with this name finder.
 /// </param>
 public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model, INameContextGenerator contextGenerator)
     : this(model, contextGenerator, 10)
 {
 }
 /// <summary>
 /// Creates a new name finder with the specified model.
 /// </summary>
 /// <param name="model">
 /// The model to be used to find names.
 /// </param>
 public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model)
     : this(model, new DefaultNameContextGenerator(10), 10)
 {
 }
 public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model)
     : this(model, new DefaultPosContextGenerator())
 {
 }
Example #17
0
        // Utilities ---------------

		/// <summary>
		/// Trains the chunker.
		/// Training file should be one word per line where each line consists of a
		/// space-delimited triple of "word pos outcome".
		/// Sentence breaks are indicated by blank lines.
		/// </summary>
		/// <param name="eventReader">The chunker event reader</param>
		/// <returns>Trained model</returns>
		public static SharpEntropy.GisModel Train(SharpEntropy.ITrainingEventReader eventReader)
		{
			return Train(eventReader, 100, 5);
		}
 public static void Train(SharpEntropy.ITrainingEventReader eventReader, string outputFilename)
 {
     SharpEntropy.GisTrainer trainer = new SharpEntropy.GisTrainer(0.1);
     trainer.TrainModel(100, new SharpEntropy.TwoPassDataIndexer(eventReader, 5));
     SharpEntropy.GisModel tokenizeModel = new SharpEntropy.GisModel(trainer);
     new SharpEntropy.IO.BinaryGisModelWriter().Persist(tokenizeModel, outputFilename);
 }
Example #19
0
 public ChunkBeamSearch(MaximumEntropyChunker maxentChunker, int size, IChunkerContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model)
     : base(size, contextGenerator, model)
 {
     _maxentChunker = maxentChunker;
 }
 /// <summary>
 /// Class constructor which takes the string locations of the
 /// information which the maxent model needs.
 /// </summary>
 public MaximumEntropyTokenizer(SharpEntropy.IMaximumEntropyModel model)
 {
     mContextGenerator = new TokenContextGenerator();
     mAlphaNumericOptimization = false;
     mModel = model;
     mNewTokens = new List<Util.Span>();
     mTokenProbabilities = new List<double>(50);
 }
Example #21
0
		/// <summary>
		/// Creates a chunker using the specified model and context generator.
		/// </summary>
		/// <param name="model">The maximum entropy model for this chunker</param>
		/// <param name="contextGenerator">The context generator to be used by the specified model</param>
		public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model, IChunkerContextGenerator contextGenerator):
            this(model, contextGenerator, 10){}
 public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator)
     : this(mDefaultBeamSize, model, contextGenerator, null)
 {
 }
 /// <summary>
 /// Constructor which takes a IMaximumEntropyModel and a IContextGenerator.
 /// calls the three-arg constructor with a default ed of sentence scanner.
 /// </summary>
 /// <param name="model">
 /// The MaxentModel which this SentenceDetectorME will use to
 /// evaluate end-of-sentence decisions.
 /// </param>
 /// <param name="contextGenerator">
 /// The IContextGenerator object which this MaximumEntropySentenceDetector
 /// will use to turn strings into contexts for the model to
 /// evaluate.
 /// </param>
 public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator)
     : this(model, contextGenerator, new DefaultEndOfSentenceScanner())
 {
 }
 public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary)
     : this(mDefaultBeamSize, model, contextGenerator, dictionary)
 {
 }
 public static SharpEntropy.GisModel TrainModel(SharpEntropy.ITrainingEventReader eventReader, int iterations, int cut)
 {
     SharpEntropy.GisTrainer trainer = new SharpEntropy.GisTrainer();
     trainer.TrainModel(eventReader, iterations, cut);
     return new SharpEntropy.GisModel(trainer);
 }
 public MaximumEntropyPosTagger(int beamSize, SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary)
 {
     mBeamSize = beamSize;
     mPosModel = model;
     mContextGenerator = contextGenerator;
     Beam = new PosBeamSearch(this, mBeamSize, contextGenerator, model);
     mDictionary = dictionary;
 }
Example #27
0
		/// <summary>
		/// Creates a new event reader based on the specified data reader.
		/// </summary>
		/// <param name="dataReader">
		/// The data reader for this event reader.
		/// </param>
        public ChunkerEventReader(SharpEntropy.ITrainingDataReader<string> dataReader)
            : this(dataReader, new DefaultChunkerContextGenerator())
		{
		}
        public virtual void LocalEvaluate(SharpEntropy.IMaximumEntropyModel posModel, System.IO.StreamReader reader, out double accuracy, out double sentenceAccuracy)
        {
            mPosModel = posModel;
            float total = 0, correct = 0, sentences = 0, sentencesCorrect = 0;

            System.IO.StreamReader sentenceReader = new System.IO.StreamReader(reader.BaseStream, System.Text.Encoding.UTF7);
            string line;

            while ((object) (line = sentenceReader.ReadLine()) != null)
            {
                sentences++;
                Util.Pair<ArrayList, ArrayList> annotatedPair = PosEventReader.ConvertAnnotatedString(line);
                ArrayList words = annotatedPair.FirstValue;
                ArrayList outcomes = annotatedPair.SecondValue;
                ArrayList tags = new ArrayList(Beam.BestSequence(words, null).Outcomes);

                int count = 0;
                bool isSentenceOK = true;
                for (System.Collections.IEnumerator tagIndex = tags.GetEnumerator(); tagIndex.MoveNext(); count++)
                {
                    total++;
                    string tag = (string) tagIndex.Current;
                    if (tag == (string)outcomes[count])
                    {
                        correct++;
                    }
                    else
                    {
                        isSentenceOK = false;
                    }
                }
                if (isSentenceOK)
                {
                    sentencesCorrect++;
                }
            }

            accuracy = correct / total;
            sentenceAccuracy = sentencesCorrect / sentences;
        }
Example #29
0
 /// <summary>
 /// Creates new search object.
 /// </summary>
 /// <param name="size">
 /// The size of the beam (k).
 /// </param>
 /// <param name="contextGenerator">
 /// the context generator for the model. 
 /// </param>
 /// <param name="model">
 /// the model for assigning probabilities to the sequence outcomes.
 /// </param>
 public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model)
     : this(size, contextGenerator, model, 0)
 {
 }
 public PosBeamSearch(MaximumEntropyPosTagger posTagger, int size, IPosContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSize)
     : base(size, contextGenerator, model, cacheSize)
 {
     mMaxentPosTagger = posTagger;
 }