///<summary> ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> ///<param name="beamSize"> ///The number of different parses kept during parsing. ///</param> ///<param name="advancePercentage"> ///The minimal amount of probability mass which advanced outcomes must represent. ///Only outcomes which contribute to the top "advancePercentage" will be explored. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) { posTagger = tagger; basalChunker = chunker; this.buildModel = buildModel; this.checkModel = checkModel; m = beamSize; k = beamSize; q = advancePercentage; buildContextGenerator = new BuildContextGenerator(); checkContextGenerator = new CheckContextGenerator(); this.headRules = headRules; startTypeMap = new Dictionary <string, string>(); continueTypeMap = new Dictionary <string, string>(); for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) { string outcome = buildModel.GetOutcomeName(buildOutcomeIndex); if (outcome.StartsWith(StartPrefix)) { //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length)); startTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length)); } else if (outcome.StartsWith(ContinuePrefix)) { //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length)); continueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length)); } } topStartIndex = buildModel.GetOutcomeIndex(MTopStart); completeIndex = checkModel.GetOutcomeIndex(CompleteOutcome); incompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome); }
public MaximumEntropyPosTagger(int beamSize, SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary) { mBeamSize = beamSize; mPosModel = model; mContextGenerator = contextGenerator; Beam = new PosBeamSearch(this, mBeamSize, contextGenerator, model); mDictionary = dictionary; }
/// <summary> /// Class constructor which takes the string locations of the /// information which the maxent model needs. /// </summary> public MaximumEntropyTokenizer(SharpEntropy.IMaximumEntropyModel model) { mContextGenerator = new TokenContextGenerator(); mAlphaNumericOptimization = false; mModel = model; mNewTokens = new List <Util.Span>(); mTokenProbabilities = new List <double>(50); }
public MaximumEntropyPosTagger(int beamSize, SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary) { UseClosedClassTagsFilter = false; this.BeamSize = beamSize; this.PosModel = model; this.ContextGenerator = contextGenerator; Beam = new PosBeamSearch(this, this.BeamSize, contextGenerator, model); this.TagDictionary = dictionary; }
/// <summary> /// Creates new search object. /// </summary> /// <param name="size"> /// The size of the beam (k). /// </param> /// <param name="contextGenerator"> /// the context generator for the model. /// </param> /// <param name="model"> /// the model for assigning probabilities to the sequence outcomes. /// </param> /// <param name="cacheSize"> /// size of the cache to use for performance. /// </param> public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSize) { Size = size; ContextGenerator = contextGenerator; Model = model; mProbabilities = new double[model.OutcomeCount]; if (cacheSize > 0) { mContextsCache = new Cache(cacheSize); } }
/// <summary>Creates new search object</summary> /// <param name="size">The size of the beam (k)</param> /// <param name="contextGenerator">the context generator for the model</param> /// <param name="model">the model for assigning probabilities to the sequence outcomes</param> /// <param name="cacheSize">size of the cache to use for performance</param> public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSize) { Size = size; ContextGenerator = contextGenerator; Model = model; _probabilities = new double[model.OutcomeCount]; if (cacheSize > 0) { _contextsCache = new Cache(cacheSize); } }
private SimilarityModel(string modelName, bool train) { ModelName = modelName; if (train) { _events = new List <SharpEntropy.TrainingEvent>(); } else { _testModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + ModelExtension)); _sameIndex = _testModel.GetOutcomeIndex(Same); } }
private SimilarityModel(string modelName, bool train) { mModelName = modelName; if (train) { mEvents = new List<SharpEntropy.TrainingEvent>(); } else { mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension)); mSameIndex = mTestModel.GetOutcomeIndex(mSame); } }
private NumberModel(string modelName, bool train) { mModelName = modelName; if (train) { mEvents = new List <SharpEntropy.TrainingEvent>(); } else { mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension)); mSingularIndex = mTestModel.GetOutcomeIndex(NumberEnum.Singular.ToString()); mPluralIndex = mTestModel.GetOutcomeIndex(NumberEnum.Plural.ToString()); } }
private NumberModel(string modelName, bool train) { mModelName = modelName; if (train) { mEvents = new List<SharpEntropy.TrainingEvent>(); } else { mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension)); mSingularIndex = mTestModel.GetOutcomeIndex(NumberEnum.Singular.ToString()); mPluralIndex = mTestModel.GetOutcomeIndex(NumberEnum.Plural.ToString()); } }
/// <summary>Creates new search object</summary> /// <param name="size">The size of the beam (k)</param> /// <param name="contextGenerator">the context generator for the model</param> /// <param name="model">the model for assigning probabilities to the sequence outcomes</param> /// <param name="cacheSizeInMegaBytes">size of the cache to use for performance</param> public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSizeInMegaBytes) { Size = size; ContextGenerator = contextGenerator; Model = model; if (cacheSizeInMegaBytes > 0) { var properties = new NameValueCollection { { "cacheMemoryLimitMegabytes", cacheSizeInMegaBytes.ToString() } }; contextsCache = new MemoryCache("beamSearchContextCache", properties); } }
/// <summary>Creates new search object</summary> /// <param name="size">The size of the beam (k)</param> /// <param name="contextGenerator">the context generator for the model</param> /// <param name="model">the model for assigning probabilities to the sequence outcomes</param> /// <param name="cacheSizeInMegaBytes">size of the cache to use for performance</param> public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSizeInMegaBytes) { Size = size; ContextGenerator = contextGenerator; Model = model; if (cacheSizeInMegaBytes > 0) { var properties = new NameValueCollection { {"cacheMemoryLimitMegabytes", cacheSizeInMegaBytes.ToString()} }; contextsCache = new MemoryCache("beamSearchContextCache", properties); } }
private GenderModel(string modelName, bool train) { mModelName = modelName; mMaleNames = ReadNames(modelName + ".mal"); mFemaleNames = ReadNames(modelName + ".fem"); if (train) { mEvents = new List <SharpEntropy.TrainingEvent>(); } else { mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension)); mMaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Male.ToString()); mFemaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Female.ToString()); mNeuterIndex = mTestModel.GetOutcomeIndex(GenderEnum.Neuter.ToString()); } }
private GenderModel(string modelName, bool train) { mModelName = modelName; mMaleNames = ReadNames(modelName + ".mal"); mFemaleNames = ReadNames(modelName + ".fem"); if (train) { mEvents = new List<SharpEntropy.TrainingEvent>(); } else { mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension)); mMaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Male.ToString()); mFemaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Female.ToString()); mNeuterIndex = mTestModel.GetOutcomeIndex(GenderEnum.Neuter.ToString()); } }
public DefaultNonReferentialResolver(string projectName, string name, ResolverMode mode) { mResolverMode = mode; mModelName = projectName + "\\" + name + "_nr"; if (mode == ResolverMode.Train) { mEvents = new List <SharpEntropy.TrainingEvent>(); } else if (mode == ResolverMode.Test) { mModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(mModelName + mModelExtension)); mNonReferentialIndex = mModel.GetOutcomeIndex(MaximumEntropyResolver.Same); } else { throw new ArgumentException("unexpected mode " + mode); } }
public DefaultNonReferentialResolver(string projectName, string name, ResolverMode mode) { mResolverMode = mode; mModelName = projectName + Path.DirectorySeparatorChar + name + "_nr"; if (mode == ResolverMode.Train) { mEvents = new List<SharpEntropy.TrainingEvent>(); } else if (mode == ResolverMode.Test) { mModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(mModelName + mModelExtension)); mNonReferentialIndex = mModel.GetOutcomeIndex(MaximumEntropyResolver.Same); } else { throw new ArgumentException("unexpected mode " + mode); } }
public virtual void LocalEvaluate(SharpEntropy.IMaximumEntropyModel posModel, StreamReader reader, out double accuracy, out double sentenceAccuracy) { this.PosModel = posModel; float total = 0, correct = 0, sentences = 0, sentencesCorrect = 0; var sentenceReader = new StreamReader(reader.BaseStream, System.Text.Encoding.UTF7); string line; while ((object)(line = sentenceReader.ReadLine()) != null) { sentences++; var annotatedPair = PosEventReader.ConvertAnnotatedString(line); var words = annotatedPair.Item1; var outcomes = annotatedPair.Item2; var tags = new ArrayList(Beam.BestSequence(words.ToArray(), null).Outcomes); int count = 0; bool isSentenceOk = true; for (IEnumerator tagIndex = tags.GetEnumerator(); tagIndex.MoveNext(); count++) { total++; var tag = (string)tagIndex.Current; if (tag == (string)outcomes[count]) { correct++; } else { isSentenceOk = false; } } if (isSentenceOk) { sentencesCorrect++; } } accuracy = correct / total; sentenceAccuracy = sentencesCorrect / sentences; }
public virtual void LocalEvaluate(SharpEntropy.IMaximumEntropyModel posModel, System.IO.StreamReader reader, out double accuracy, out double sentenceAccuracy) { mPosModel = posModel; float total = 0, correct = 0, sentences = 0, sentencesCorrect = 0; System.IO.StreamReader sentenceReader = new System.IO.StreamReader(reader.BaseStream, System.Text.Encoding.UTF7); string line; while ((object)(line = sentenceReader.ReadLine()) != null) { sentences++; Util.Pair <ArrayList, ArrayList> annotatedPair = PosEventReader.ConvertAnnotatedString(line); ArrayList words = annotatedPair.FirstValue; ArrayList outcomes = annotatedPair.SecondValue; ArrayList tags = new ArrayList(Beam.BestSequence(words, null).Outcomes); int count = 0; bool isSentenceOK = true; for (System.Collections.IEnumerator tagIndex = tags.GetEnumerator(); tagIndex.MoveNext(); count++) { total++; string tag = (string)tagIndex.Current; if (tag == (string)outcomes[count]) { correct++; } else { isSentenceOK = false; } } if (isSentenceOK) { sentencesCorrect++; } } accuracy = correct / total; sentenceAccuracy = sentencesCorrect / sentences; }
///<summary> ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> ///<param name="beamSize"> ///The number of different parses kept during parsing. ///</param> ///<param name="advancePercentage"> ///The minimal amount of probability mass which advanced outcomes must represent. ///Only outcomes which contribute to the top "advancePercentage" will be explored. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) { mPosTagger = tagger; mBasalChunker = chunker; mBuildModel = buildModel; mCheckModel = checkModel; M = beamSize; K = beamSize; Q = advancePercentage; mBuildProbabilities = new double[mBuildModel.OutcomeCount]; mCheckProbabilities = new double[mCheckModel.OutcomeCount]; mBuildContextGenerator = new BuildContextGenerator(); mCheckContextGenerator = new CheckContextGenerator(); mHeadRules = headRules; mOldDerivationsHeap = new Util.TreeSet <Parse>(); mNewDerivationsHeap = new Util.TreeSet <Parse>(); mParses = new Util.TreeSet <Parse>(); mStartTypeMap = new Dictionary <string, string>(); mContinueTypeMap = new Dictionary <string, string>(); for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) { string outcome = buildModel.GetOutcomeName(buildOutcomeIndex); if (outcome.StartsWith(StartPrefix)) { //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length)); mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length)); } else if (outcome.StartsWith(ContinuePrefix)) { //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length)); mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length)); } } mTopStartIndex = buildModel.GetOutcomeIndex(MTopStart); mCompleteIndex = checkModel.GetOutcomeIndex(CompleteOutcome); mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome); }
/// <summary> /// Creates a maximum-entropy-based resolver with the specified model name, using the /// specified mode, which will look the specified number of entities back for a referent and /// prefer the first referent if specified. /// </summary> /// <param name="modelDirectory"> /// The name of the directory where the resover models are stored. /// </param> /// <param name="name"> /// The name of the file where this model will be read or written. /// </param> /// <param name="mode"> /// The mode this resolver is being using in (training, testing). /// </param> /// <param name="numberOfEntitiesBack"> /// The number of entities back in the text that this resolver will look /// for a referent. /// </param> /// <param name="preferFirstReferent"> /// Set to true if the resolver should prefer the first referent which is more /// likly than non-reference. This only affects testing. /// </param> /// <param name="nonReferentialResolver"> /// Determines how likly it is that this entity is non-referential. /// </param> protected MaximumEntropyResolver(string modelDirectory, string name, ResolverMode mode, int numberOfEntitiesBack, bool preferFirstReferent, INonReferentialResolver nonReferentialResolver) : base(numberOfEntitiesBack) { mPreferFirstReferent = preferFirstReferent; mNonReferentialResolver = nonReferentialResolver; mResolverMode = mode; mModelName = modelDirectory + "/" + name; if (mResolverMode == ResolverMode.Test) { mModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(mModelName + mModelExtension)); mSameIndex = mModel.GetOutcomeIndex(Same); } else if (mResolverMode == ResolverMode.Train) { mEvents = new List <SharpEntropy.TrainingEvent>(); } else { System.Console.Error.WriteLine("Unknown mode: " + mResolverMode); } //add one for non-referent possibility mCandidateProbabilities = new double[GetNumberEntitiesBack() + 1]; }
/// <summary> /// Creates a new <code>MaximumEntropySentenceDetector</code> instance. /// </summary> /// <param name="model"> /// The IMaximumEntropyModel which this MaximumEntropySentenceDetector will use to /// evaluate end-of-sentence decisions. /// </param> /// <param name="contextGenerator">The IContextGenerator object which this MaximumEntropySentenceDetector /// will use to turn strings into contexts for the model to /// evaluate. /// </param> /// <param name="scanner">the EndOfSentenceScanner which this MaximumEntropySentenceDetector /// will use to locate end of sentence indexes. /// </param> public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator, IEndOfSentenceScanner scanner) { mModel = model; mContextGenerator = contextGenerator; mScanner = scanner; }
public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model, PosLookupList dictionary) : this(mDefaultBeamSize, model, new DefaultPosContextGenerator(), dictionary) { }
public PosBeamSearch(MaximumEntropyPosTagger posTagger, int size, IPosContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int cacheSize) : base(size, contextGenerator, model, cacheSize) { _maxentPosTagger = posTagger; }
/// <summary> /// Class constructor which takes the string locations of the /// information which the maxent model needs. /// </summary> public MaximumEntropyTokenizer(SharpEntropy.IMaximumEntropyModel model) { mContextGenerator = new TokenContextGenerator(); mAlphaNumericOptimization = false; mModel = model; mNewTokens = new List<Util.Span>(); mTokenProbabilities = new List<double>(50); }
public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator) : this(DefaultBeamSize, model, contextGenerator, null) { }
/// <summary> /// Constructor which takes a IMaximumEntropyModel and a IContextGenerator. /// calls the three-arg constructor with a default ed of sentence scanner. /// </summary> /// <param name="model"> /// The MaxentModel which this SentenceDetectorME will use to /// evaluate end-of-sentence decisions. /// </param> /// <param name="contextGenerator"> /// The IContextGenerator object which this MaximumEntropySentenceDetector /// will use to turn strings into contexts for the model to /// evaluate. /// </param> public MaximumEntropySentenceDetector(SharpEntropy.IMaximumEntropyModel model, SharpEntropy.IContextGenerator<Util.Pair<System.Text.StringBuilder, int>> contextGenerator) : this(model, contextGenerator, new DefaultEndOfSentenceScanner()) { }
/// <summary> /// Creates a chunker using the specified model and context generator and decodes the /// model using a beam search of the specified size. /// </summary> /// <param name="model"> /// The maximum entropy model for this chunker. /// </param> /// <param name="contextGenerator"> /// The context generator to be used by the specified model. /// </param> /// <param name="beamSize"> /// The size of the beam that should be used when decoding sequences. /// </param> public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model, IChunkerContextGenerator contextGenerator, int beamSize) { mBeam = new ChunkBeamSearch(this, beamSize, contextGenerator, model); mModel = model; }
///<summary> ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> ///<param name="beamSize"> ///The number of different parses kept during parsing. ///</param> ///<param name="advancePercentage"> ///The minimal amount of probability mass which advanced outcomes must represent. ///Only outcomes which contribute to the top "advancePercentage" will be explored. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) { posTagger = tagger; basalChunker = chunker; this.buildModel = buildModel; this.checkModel = checkModel; m = beamSize; k = beamSize; q = advancePercentage; buildContextGenerator = new BuildContextGenerator(); checkContextGenerator = new CheckContextGenerator(); this.headRules = headRules; startTypeMap = new Dictionary<string, string>(); continueTypeMap = new Dictionary<string, string>(); for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) { string outcome = buildModel.GetOutcomeName(buildOutcomeIndex); if (outcome.StartsWith(StartPrefix)) { //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length)); startTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length)); } else if (outcome.StartsWith(ContinuePrefix)) { //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length)); continueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length)); } } topStartIndex = buildModel.GetOutcomeIndex(MTopStart); completeIndex = checkModel.GetOutcomeIndex(CompleteOutcome); incompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome); }
/// <summary> /// Creates a new name finder with the specified model and context generator. /// </summary> /// <param name="model"> /// The model to be used to find names. /// </param> /// <param name="contextGenerator"> /// The context generator to be used with this name finder. /// </param> /// <param name="beamSize"> /// The size of the beam to be used in decoding this model. /// </param> public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model, INameContextGenerator contextGenerator, int beamSize) { mModel = model; mContextGenerator = contextGenerator; mBeam = new NameBeamSearch(this, beamSize, contextGenerator, model, beamSize); }
/// <summary> /// Creates a chunker using the specified model and context generator and decodes the /// model using a beam search of the specified size. /// </summary> /// <param name="model">The maximum entropy model for this chunker</param> /// <param name="contextGenerator">The context generator to be used by the specified model</param> /// <param name="beamSize">The size of the beam that should be used when decoding sequences</param> public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model, IChunkerContextGenerator contextGenerator, int beamSize) { Beam = new ChunkBeamSearch(this, beamSize, contextGenerator, model); Model = model; }
/// <summary> /// Creates a new name finder with the specified model and context generator. /// </summary> /// <param name="model"> /// The model to be used to find names. /// </param> /// <param name="contextGenerator"> /// The context generator to be used with this name finder. /// </param> public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model, INameContextGenerator contextGenerator) : this(model, contextGenerator, 10) { }
// Constructors ---------------------------------------------- /// <summary> /// Creates a new name finder with the specified model. /// </summary> /// <param name="model"> /// The model to be used to find names. /// </param> public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model) : this(model, new DefaultNameContextGenerator(10), 10) { }
// Constructors ------------------------- ///<summary> ///Creates a new parser using the specified models and head rules. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules) : this(buildModel, checkModel, tagger, chunker, headRules, DefaultBeamSize, DefaultAdvancePercentage) { }
/// <summary> /// Creates a maximum-entropy-based resolver with the specified model name, using the /// specified mode, which will look the specified number of entities back for a referent and /// prefer the first referent if specified. /// </summary> /// <param name="modelDirectory"> /// The name of the directory where the resover models are stored. /// </param> /// <param name="name"> /// The name of the file where this model will be read or written. /// </param> /// <param name="mode"> /// The mode this resolver is being using in (training, testing). /// </param> /// <param name="numberOfEntitiesBack"> /// The number of entities back in the text that this resolver will look /// for a referent. /// </param> /// <param name="preferFirstReferent"> /// Set to true if the resolver should prefer the first referent which is more /// likly than non-reference. This only affects testing. /// </param> /// <param name="nonReferentialResolver"> /// Determines how likly it is that this entity is non-referential. /// </param> protected MaximumEntropyResolver(string modelDirectory, string name, ResolverMode mode, int numberOfEntitiesBack, bool preferFirstReferent, INonReferentialResolver nonReferentialResolver) : base(numberOfEntitiesBack) { mPreferFirstReferent = preferFirstReferent; mNonReferentialResolver = nonReferentialResolver; mResolverMode = mode; mModelName = modelDirectory + "/" + name; if (mResolverMode == ResolverMode.Test) { mModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(mModelName + mModelExtension)); mSameIndex = mModel.GetOutcomeIndex(Same); } else if (mResolverMode == ResolverMode.Train) { mEvents = new List<SharpEntropy.TrainingEvent>(); } else { System.Console.Error.WriteLine("Unknown mode: " + mResolverMode); } //add one for non-referent possibility mCandidateProbabilities = new double[GetNumberEntitiesBack() + 1]; }
public ChunkBeamSearch(MaximumEntropyChunker maxentChunker, int size, IChunkerContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model) : base(size, contextGenerator, model) { mMaxentChunker = maxentChunker; }
/// <summary> /// Creates a new name finder with the specified model and context generator. /// </summary> /// <param name="model"> /// The model to be used to find names. /// </param> /// <param name="contextGenerator"> /// The context generator to be used with this name finder. /// </param> /// <param name="beamSize"> /// The size of the beam to be used in decoding this model. /// </param> public MaximumEntropyNameFinder(SharpEntropy.IMaximumEntropyModel model, INameContextGenerator contextGenerator, int beamSize) { mModel = model; mContextGenerator = contextGenerator; mBeam = new NameBeamSearch(this, beamSize, contextGenerator, model, beamSize); }
/// <summary> /// Creates a chunker using the specified model. /// </summary> /// <param name="model"> /// The maximum entropy model for this chunker. /// </param> public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model) : this(model, new DefaultChunkerContextGenerator(), 10) { }
// Constructors ------------------------ public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model) : this(model, new DefaultPosContextGenerator()) { }
/// <summary> /// Creates a chunker using the specified model and context generator. /// </summary> /// <param name="model"> /// The maximum entropy model for this chunker. /// </param> /// <param name="contextGenerator"> /// The context generator to be used by the specified model. /// </param> public MaximumEntropyChunker(SharpEntropy.IMaximumEntropyModel model, IChunkerContextGenerator contextGenerator) : this(model, contextGenerator, 10) { }
public MaximumEntropyPosTagger(SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary) : this(DefaultBeamSize, model, contextGenerator, dictionary) { }
// Constructors ----------------- /// <summary>Creates new search object</summary> /// <param name="size">The size of the beam (k)</param> /// <param name="contextGenerator">the context generator for the model</param> /// <param name="model">the model for assigning probabilities to the sequence outcomes</param> public BeamSearch(int size, IBeamSearchContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model) : this(size, contextGenerator, model, 0) { }
public virtual void LocalEvaluate(SharpEntropy.IMaximumEntropyModel posModel, System.IO.StreamReader reader, out double accuracy, out double sentenceAccuracy) { mPosModel = posModel; float total = 0, correct = 0, sentences = 0, sentencesCorrect = 0; System.IO.StreamReader sentenceReader = new System.IO.StreamReader(reader.BaseStream, System.Text.Encoding.UTF7); string line; while ((object) (line = sentenceReader.ReadLine()) != null) { sentences++; Util.Pair<ArrayList, ArrayList> annotatedPair = PosEventReader.ConvertAnnotatedString(line); ArrayList words = annotatedPair.FirstValue; ArrayList outcomes = annotatedPair.SecondValue; ArrayList tags = new ArrayList(Beam.BestSequence(words, null).Outcomes); int count = 0; bool isSentenceOK = true; for (System.Collections.IEnumerator tagIndex = tags.GetEnumerator(); tagIndex.MoveNext(); count++) { total++; string tag = (string) tagIndex.Current; if (tag == (string)outcomes[count]) { correct++; } else { isSentenceOK = false; } } if (isSentenceOK) { sentencesCorrect++; } } accuracy = correct / total; sentenceAccuracy = sentencesCorrect / sentences; }
public MaximumEntropyPosTagger(int beamSize, SharpEntropy.IMaximumEntropyModel model, IPosContextGenerator contextGenerator, PosLookupList dictionary) { mBeamSize = beamSize; mPosModel = model; mContextGenerator = contextGenerator; Beam = new PosBeamSearch(this, mBeamSize, contextGenerator, model); mDictionary = dictionary; }
/// <summary> /// Creates a beam seach of the specified size using the specified model with the specified context generator. /// </summary> /// <param name="nameFinder"> /// The associated MaximumEntropyNameFinder instance. /// </param> /// <param name="size"> /// The size of the beam. /// </param> /// <param name="contextGenerator"> /// The context generator used with the specified model. /// </param> /// <param name="model"> /// The model used to determine names. /// </param> /// <param name="beamSize"> /// The size of the beam to use in searching. /// </param> public NameBeamSearch(MaximumEntropyNameFinder nameFinder, int size, INameContextGenerator contextGenerator, SharpEntropy.IMaximumEntropyModel model, int beamSize) : base(size, contextGenerator, model, beamSize) { _nameFinder = nameFinder; }
///<summary> ///Creates a new parser using the specified models and head rules using the specified beam size and advance percentage. ///</summary> ///<param name="buildModel"> ///The model to assign constituent labels. ///</param> ///<param name="checkModel"> ///The model to determine a constituent is complete. ///</param> ///<param name="tagger"> ///The model to assign pos-tags. ///</param> ///<param name="chunker"> ///The model to assign flat constituent labels. ///</param> ///<param name="headRules"> ///The head rules for head word perculation. ///</param> ///<param name="beamSize"> ///The number of different parses kept during parsing. ///</param> ///<param name="advancePercentage"> ///The minimal amount of probability mass which advanced outcomes must represent. ///Only outcomes which contribute to the top "advancePercentage" will be explored. ///</param> public MaximumEntropyParser(SharpEntropy.IMaximumEntropyModel buildModel, SharpEntropy.IMaximumEntropyModel checkModel, IParserTagger tagger, IParserChunker chunker, IHeadRules headRules, int beamSize, double advancePercentage) { mPosTagger = tagger; mBasalChunker = chunker; mBuildModel = buildModel; mCheckModel = checkModel; M = beamSize; K = beamSize; Q = advancePercentage; mBuildProbabilities = new double[mBuildModel.OutcomeCount]; mCheckProbabilities = new double[mCheckModel.OutcomeCount]; mBuildContextGenerator = new BuildContextGenerator(); mCheckContextGenerator = new CheckContextGenerator(); mHeadRules = headRules; mOldDerivationsHeap = new Util.TreeSet<Parse>(); mNewDerivationsHeap = new Util.TreeSet<Parse>(); mParses = new Util.TreeSet<Parse>(); mStartTypeMap = new Dictionary<string, string>(); mContinueTypeMap = new Dictionary<string, string>(); for (int buildOutcomeIndex = 0, buildOutcomeCount = buildModel.OutcomeCount; buildOutcomeIndex < buildOutcomeCount; buildOutcomeIndex++) { string outcome = buildModel.GetOutcomeName(buildOutcomeIndex); if (outcome.StartsWith(StartPrefix)) { //System.Console.Error.WriteLine("startMap " + outcome + "->" + outcome.Substring(StartPrefix.Length)); mStartTypeMap.Add(outcome, outcome.Substring(StartPrefix.Length)); } else if (outcome.StartsWith(ContinuePrefix)) { //System.Console.Error.WriteLine("contMap " + outcome + "->" + outcome.Substring(ContinuePrefix.Length)); mContinueTypeMap.Add(outcome, outcome.Substring(ContinuePrefix.Length)); } } mTopStartIndex = buildModel.GetOutcomeIndex(mTopStart); mCompleteIndex = checkModel.GetOutcomeIndex(CompleteOutcome); mIncompleteIndex = checkModel.GetOutcomeIndex(IncompleteOutcome); }