/// <summary> /// Create an event reader based on the specified data reader of the specified type using the specified head rules. /// </summary> /// <param name="dataReader"> /// A 1-parse-per-line Penn Treebank Style parse. /// </param> /// <param name="rules"> /// The head rules. /// </param> /// <param name="eventType"> /// The type of events desired (tag, chunk, build, or check). /// </param> public ParserEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IHeadRules rules, EventType eventType) { if (eventType == EventType.Build) { mBuildContextGenerator = new BuildContextGenerator(); } else if (eventType == EventType.Check) { mCheckContextGenerator = new CheckContextGenerator(); } else if (eventType == EventType.Chunk) { mChunkContextGenerator = new ChunkContextGenerator(); } else if (eventType == EventType.Tag) { mPosContextGenerator = new PosTagger.DefaultPosContextGenerator(); } mHeadRules = rules; mEventType = eventType; mDataReader = dataReader; mEventIndex = 0; if (dataReader.HasNext()) { AddNewEvents(); } else { mEvents = new SharpEntropy.TrainingEvent[0]; } }
/// <summary> /// Creates a new event reader based on the specified data reader using the specified context generator. /// </summary> /// <param name="dataReader"> /// The data reader for this event reader. /// </param> /// <param name="contextGenerator"> /// The context generator which should be used in the creation of events for this event stream. /// </param> public NameFinderEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, INameContextGenerator contextGenerator) { mDataReader = dataReader; mContextGenerator = contextGenerator; mEventIndex = 0; mPreviousTags = new Dictionary<string, string>(); //prime events with first line of data stream. if (mDataReader.HasNext()) { mLine = mDataReader.NextToken(); if (mLine.Length == 0) { mPreviousTags.Clear(); } else { AddEvents(mLine); } } else { mEvents = new SharpEntropy.TrainingEvent[0]; } }
/// <summary>Creates new search object</summary> /// <param name="size">The size of the beam (k)</param> /// <param name="contextGenerator">the context generator for the model</param> /// <param name="model">the model for assigning probabilities to the sequence outcomes</param> /// <param name="cacheSize">size of the cache to use for performance</param> public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSize) { Size = size; ContextGenerator = contextGenerator; Model = model; _probabilities = new double[model.OutcomeCount]; if (cacheSize > 0) { _contextsCache = new Cache(cacheSize); } }
/// <summary> /// Creates a new event reader based on the specified data reader using the specified context generator. /// </summary> /// <param name="dataReader"> /// The data reader for this event reader. /// </param> /// <param name="contextGenerator"> /// The context generator which should be used in the creation of events for this event reader. /// </param> public ChunkerEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IChunkerContextGenerator contextGenerator) { mContextGenerator = contextGenerator; mDataReader = dataReader; mEventIndex = 0; if (dataReader.HasNext()) { AddNewEvents(); } else { mEvents = new SharpEntropy.TrainingEvent[0]; } }
public SentenceDetectionEventReader(SharpEntropy.ITrainingDataReader<string> dataReader, IEndOfSentenceScanner scanner, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator) { mDataReader = dataReader; mScanner = scanner; mContextGenerator = contextGenerator; if (mDataReader.HasNext()) { string current = mDataReader.NextToken(); if (mDataReader.HasNext()) { mNext = mDataReader.NextToken(); } AddNewEvents(current); } }
/// <summary>Creates new search object</summary> /// <param name="size">The size of the beam (k)</param> /// <param name="contextGenerator">the context generator for the model</param> /// <param name="model">the model for assigning probabilities to the sequence outcomes</param> /// <param name="cacheSizeInMegaBytes">size of the cache to use for performance</param> public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSizeInMegaBytes) { Size = size; ContextGenerator = contextGenerator; Model = model; if (cacheSizeInMegaBytes > 0) { var properties = new NameValueCollection { {"cacheMemoryLimitMegabytes", cacheSizeInMegaBytes.ToString()} }; contextsCache = new MemoryCache("beamSearchContextCache", properties); } }
/// <summary> /// Constructor which takes a IMaximumEntropyModel and calls the three-arg /// constructor with that model, a SentenceDetectionContextGenerator, and the /// default end of sentence scanner. /// </summary> /// <param name="model"> /// The MaxentModel which this SentenceDetectorME will use to /// evaluate end-of-sentence decisions. /// </param> public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model) : this(model, new SentenceDetectionContextGenerator(DefaultEndOfSentenceScanner.GetEndOfSentenceCharacters()), new DefaultEndOfSentenceScanner()) { mSentenceProbs = new List<double>(50); mUnicodeMapping = false; }
/// <summary> /// Creates a new <code>MaximumEntropySentenceDetector</code> instance. /// </summary> /// <param name="model"> /// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to /// evaluate end-of-sentence decisions. /// </param> /// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector /// will use to turn strings into contexts for the model to /// evaluate. /// </param> /// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector /// will use to locate end of sentence indexes. /// </param> public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator, IEndOfSentenceScanner scanner) { mModel = model; mContextGenerator = contextGenerator; mScanner = scanner; }
/// <summary> /// Creates a chunker using the specified model and context generator and decodes the /// model using a beam search of the specified size. /// </summary> /// <param name="model">The maximum entropy model for this chunker</param> /// <param name="contextGenerator">The context generator to be used by the specified model</param> /// <param name="beamSize">The size of the beam that should be used when decoding sequences</param> public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model, IChunkerContextGenerator contextGenerator, int beamSize) { Beam = new ChunkBeamSearch(this, beamSize, contextGenerator, model); Model = model; }
// Constructors --------------- /// <summary>Creates a chunker using the specified model</summary> /// <param name="model">The maximum entropy model for this chunker</param> public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model): this(model, new DefaultChunkerContextGenerator(), 10){}
/// <summary> /// Trains the chunker. /// Training file should be one word per line where each line consists of a /// space-delimited triple of "word pos outcome". /// Sentence breaks are indicated by blank lines. /// </summary> /// <param name="eventReader">The chunker event reader</param> /// <param name="iterations">The number of iterations to perform</param> /// <param name="cutoff"> /// The number of times a predicate must be seen in order /// to be relevant for training. /// </param> /// <returns>Trained model</returns> public static SharpEntropy.GisModel Train(SharpEntropy.ITrainingEventReader eventReader, int iterations, int cutoff) { var trainer = new SharpEntropy.GisTrainer(); trainer.TrainModel(iterations, new SharpEntropy.TwoPassDataIndexer(eventReader, cutoff)); return new SharpEntropy.GisModel(trainer); }
/// <summary> /// Creates a new event reader based on the specified data reader. /// </summary> /// <param name="dataReader"> /// The data stream for this event reader. /// </param> public NameFinderEventReader(SharpEntropy.ITrainingDataReader<string> dataReader) : this(dataReader, new DefaultNameContextGenerator()) { }
public MaxentTokenizer(SharpEntropy.IO.IGisModelReader modelReader) { mModel = new GisModel(modelReader); }
/// <summary> /// Creates a new name finder with the specified model and context generator. /// </summary> /// <param name="model"> /// The model to be used to find names. /// </param> /// <param name="contextGenerator"> /// The context generator to be used with this name finder. /// </param> public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model, INameContextGenerator contextGenerator) : this(model, contextGenerator, 10) { }
/// <summary> /// Creates a new name finder with the specified model. /// </summary> /// <param name="model"> /// The model to be used to find names. /// </param> public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model) : this(model, new DefaultNameContextGenerator(10), 10) { }
public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model) : this(model, new DefaultPosContextGenerator()) { }
// Utilities --------------- /// <summary> /// Trains the chunker. /// Training file should be one word per line where each line consists of a /// space-delimited triple of "word pos outcome". /// Sentence breaks are indicated by blank lines. /// </summary> /// <param name="eventReader">The chunker event reader</param> /// <returns>Trained model</returns> public static SharpEntropy.GisModel Train(SharpEntropy.ITrainingEventReader eventReader) { return Train(eventReader, 100, 5); }
public static void Train(SharpEntropy.ITrainingEventReader eventReader, string outputFilename) { SharpEntropy.GisTrainer trainer = new SharpEntropy.GisTrainer(0.1); trainer.TrainModel(100, new SharpEntropy.TwoPassDataIndexer(eventReader, 5)); SharpEntropy.GisModel tokenizeModel = new SharpEntropy.GisModel(trainer); new SharpEntropy.IO.BinaryGisModelWriter().Persist(tokenizeModel, outputFilename); }
public ChunkBeamSearch(MaximumEntropyChunker maxentChunker, int size, IChunkerContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model) : base(size, contextGenerator, model) { _maxentChunker = maxentChunker; }
/// <summary> /// Class constructor which takes the string locations of the /// information which the maxent model needs. /// </summary> public MaximumEntropyTokenizer(SharpEntropy.IMaximumEntropyModel model) { mContextGenerator = new TokenContextGenerator(); mAlphaNumericOptimization = false; mModel = model; mNewTokens = new List<Util.Span>(); mTokenProbabilities = new List<double>(50); }
/// <summary> /// Creates a chunker using the specified model and context generator. /// </summary> /// <param name="model">The maximum entropy model for this chunker</param> /// <param name="contextGenerator">The context generator to be used by the specified model</param> public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model, IChunkerContextGenerator contextGenerator): this(model, contextGenerator, 10){}
public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator) : this(mDefaultBeamSize, model, contextGenerator, null) { }
/// <summary> /// Constructor which takes a IMaximumEntropyModel and a IContextGenerator. /// calls the three-arg constructor with a default ed of sentence scanner. /// </summary> /// <param name="model"> /// The MaxentModel which this SentenceDetectorME will use to /// evaluate end-of-sentence decisions. /// </param> /// <param name="contextGenerator"> /// The IContextGenerator object which this MaximumEntropySentenceDetector /// will use to turn strings into contexts for the model to /// evaluate. /// </param> public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator) : this(model, contextGenerator, new DefaultEndOfSentenceScanner()) { }
public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary) : this(mDefaultBeamSize, model, contextGenerator, dictionary) { }
public static SharpEntropy.GisModel TrainModel(SharpEntropy.ITrainingEventReader eventReader, int iterations, int cut) { SharpEntropy.GisTrainer trainer = new SharpEntropy.GisTrainer(); trainer.TrainModel(eventReader, iterations, cut); return new SharpEntropy.GisModel(trainer); }
public MaximumEntropyPosTagger(int beamSize, SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary) { mBeamSize = beamSize; mPosModel = model; mContextGenerator = contextGenerator; Beam = new PosBeamSearch(this, mBeamSize, contextGenerator, model); mDictionary = dictionary; }
/// <summary> /// Creates a new event reader based on the specified data reader. /// </summary> /// <param name="dataReader"> /// The data reader for this event reader. /// </param> public ChunkerEventReader(SharpEntropy.ITrainingDataReader<string> dataReader) : this(dataReader, new DefaultChunkerContextGenerator()) { }
public virtual void LocalEvaluate(SharpEntropy.IMaximumEntropyModel posModel, System.IO.StreamReader reader, out double accuracy, out double sentenceAccuracy) { mPosModel = posModel; float total = 0, correct = 0, sentences = 0, sentencesCorrect = 0; System.IO.StreamReader sentenceReader = new System.IO.StreamReader(reader.BaseStream, System.Text.Encoding.UTF7); string line; while ((object) (line = sentenceReader.ReadLine()) != null) { sentences++; Util.Pair<ArrayList, ArrayList> annotatedPair = PosEventReader.ConvertAnnotatedString(line); ArrayList words = annotatedPair.FirstValue; ArrayList outcomes = annotatedPair.SecondValue; ArrayList tags = new ArrayList(Beam.BestSequence(words, null).Outcomes); int count = 0; bool isSentenceOK = true; for (System.Collections.IEnumerator tagIndex = tags.GetEnumerator(); tagIndex.MoveNext(); count++) { total++; string tag = (string) tagIndex.Current; if (tag == (string)outcomes[count]) { correct++; } else { isSentenceOK = false; } } if (isSentenceOK) { sentencesCorrect++; } } accuracy = correct / total; sentenceAccuracy = sentencesCorrect / sentences; }
/// <summary> /// Creates new search object. /// </summary> /// <param name="size"> /// The size of the beam (k). /// </param> /// <param name="contextGenerator"> /// the context generator for the model. /// </param> /// <param name="model"> /// the model for assigning probabilities to the sequence outcomes. /// </param> public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model) : this(size, contextGenerator, model, 0) { }
public PosBeamSearch(MaximumEntropyPosTagger posTagger, int size, IPosContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSize) : base(size, contextGenerator, model, cacheSize) { mMaxentPosTagger = posTagger; }