internal static void TestModel(IMaxentModel model, Event ev, double higherProbability) { var outcomes = model.Eval(ev.Context); var outcome = model.GetBestOutcome(outcomes); Assert.AreEqual(2, outcomes.Length); Assert.AreEqual(ev.Outcome, outcome); if (ev.Outcome.Equals(model.GetOutcome(0))) { Assert.AreEqual(higherProbability, outcomes[0], 0.0001); } if (!ev.Outcome.Equals(model.GetOutcome(0))) { Assert.AreEqual(1.0 - higherProbability, outcomes[0], 0.0001); } if (ev.Outcome.Equals(model.GetOutcome(1))) { Assert.AreEqual(higherProbability, outcomes[1], 0.0001); } if (!ev.Outcome.Equals(model.GetOutcome(1))) { Assert.AreEqual(1.0 - higherProbability, outcomes[1], 0.0001); } }
private Parser( IMaxentModel buildModel, IMaxentModel attachModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) { this.buildModel = buildModel; this.attachModel = attachModel; this.checkModel = checkModel; buildContextGenerator = new BuildContextGenerator(); attachContextGenerator = new AttachContextGenerator(punctSet); checkContextGenerator = new CheckContextGenerator(punctSet); bProbs = new double[buildModel.GetNumOutcomes()]; aProbs = new double[attachModel.GetNumOutcomes()]; cProbs = new double[checkModel.GetNumOutcomes()]; doneIndex = buildModel.GetIndex(DONE); sisterAttachIndex = attachModel.GetIndex(ATTACH_SISTER); daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER); // nonAttachIndex = attachModel.GetIndex(NON_ATTACH); attachments = new[] { daughterAttachIndex, sisterAttachIndex }; completeIndex = checkModel.GetIndex(COMPLETE); }
public SentenceDetectorME(SentenceModel sentenceModel) { model = sentenceModel.MaxentModel; cgen = sentenceModel.Factory.GetContextGenerator(); scanner = sentenceModel.Factory.GetEndOfSentenceScanner(); useTokenEnd = sentenceModel.UseTokenEnd; }
private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) { this.buildModel = buildModel; this.checkModel = checkModel; bProbs = new double[buildModel.GetNumOutcomes()]; cProbs = new double[checkModel.GetNumOutcomes()]; buildContextGenerator = new BuildContextGenerator(); checkContextGenerator = new CheckContextGenerator(); startTypeMap = new Dictionary <string, string>(); contTypeMap = new Dictionary <string, string>(); for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++) { var outcome = buildModel.GetOutcome(boi); if (outcome.StartsWith(START)) { startTypeMap[outcome] = outcome.Substring(START.Length); } else if (outcome.StartsWith(CONT)) { contTypeMap[outcome] = outcome.Substring(CONT.Length); } } topStartIndex = buildModel.GetIndex(TOP_START); completeIndex = checkModel.GetIndex(COMPLETE); incompleteIndex = checkModel.GetIndex(INCOMPLETE); }
/// <summary> /// Initializes a new instance of the <see cref="SentenceDetectorME" /> using the given sentence model. /// </summary> /// <param name="sentenceModel">The sentence model.</param> /// <exception cref="System.ArgumentNullException"> /// The <paramref name="sentenceModel"/> is <c>null</c>. /// </exception> public SentenceDetectorME(SentenceModel sentenceModel) { if (sentenceModel == null) { throw new ArgumentNullException(nameof(sentenceModel)); } model = sentenceModel.MaxentModel; cgen = sentenceModel.Factory.GetContextGenerator(); scanner = sentenceModel.Factory.GetEndOfSentenceScanner(); useTokenEnd = sentenceModel.UseTokenEnd; if (sentenceModel.Abbreviations == null) { return; } stringComparison = sentenceModel.Abbreviations.IsCaseSensitive ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; abbreviationTokens = new Dictionary <string, int>(); foreach (var abbreviation in sentenceModel.Abbreviations) { foreach (var token in abbreviation.Tokens) { abbreviationTokens.Add(token, token.Length); } } }
internal static void TestModel(IMaxentModel model, double expecedAccuracy) { var devEvents = readPpaFile("devset"); var total = 0; var correct = 0; foreach (var ev in devEvents) { //String targetLabel = ev.getOutcome(); var ocs = model.Eval(ev.Context); var best = 0; for (var i = 1; i < ocs.Length; i++) if (ocs[i] > ocs[best]) best = i; var predictedLabel = model.GetOutcome(best); if (ev.Outcome.Equals(predictedLabel)) correct++; total++; } var accuracy = correct/(double) total; Console.Out.WriteLine("Accuracy on PPA devSet: (" + correct + "/" + total + ") " + accuracy); Assert.AreEqual(expecedAccuracy, accuracy, .00001); }
/// <summary> /// Trains a name finder model with the given parameters. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream <NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) { var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); var manifestInfoEntries = new Dictionary <string, string>(); var trainerType = TrainerFactory.GetTrainerType(parameters); IMaxentModel meModel = null; ML.Model.ISequenceClassificationModel <string> scModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(), factory.CreateSequenceCodec()); var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); meModel = nfTrainer.Train(eventStream); break; case TrainerType.EventModelSequenceTrainer: var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator()); var nsTrainer = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); meModel = nsTrainer.Train(sampleStream); break; case TrainerType.SequenceTrainer: var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator()); var sqTrainer = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); scModel = sqTrainer.Train(sequenceStream); break; default: throw new InvalidOperationException("Unexpected trainer type!"); } if (scModel != null) { return(new TokenNameFinderModel( languageCode, scModel, factory.FeatureGenerator, factory.Resources, manifestInfoEntries, factory.SequenceCodec)); } return(new TokenNameFinderModel( languageCode, meModel, beamSize, factory.FeatureGenerator, factory.Resources, manifestInfoEntries, factory.SequenceCodec)); }
public ParserModel(string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType) : this( languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType, null) { }
public ParserModel(string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType type, Dictionary <string, string> manifestInfoEntries) : this( languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, type, manifestInfoEntries) { }
/// <summary> /// Initializes a new instance of the <see cref="TokenNameFinderModel"/> class. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="nameFinderModel">The name finder model.</param> /// <param name="resources">The resources.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> public TokenNameFinderModel( string languageCode, IMaxentModel nameFinderModel, Dictionary <string, object> resources, Dictionary <string, string> manifestInfoEntries ) : this(languageCode, nameFinderModel, null, resources, manifestInfoEntries) { }
public POSModel( string languageCode, IMaxentModel posModel, Dictionary <string, string> manifestInfoEntries, POSTaggerFactory posFactory) : this(languageCode, posModel, POSTaggerME.DefaultBeamSize, manifestInfoEntries, posFactory) { }
public LemmatizerModel(string languageCode, IMaxentModel lemmatizerModel, int beamSize, Dictionary <string, string> manifestInfoEntries, LemmatizerFactory factory) : base(ComponentName, languageCode, manifestInfoEntries, factory) { artifactMap.Add(ModelEntry, lemmatizerModel); Manifest[Parameters.BeamSize] = beamSize.ToString(CultureInfo.InvariantCulture); CheckArtifactMap(); }
/// <summary> /// Initializes a new instance of the <see cref="TokenNameFinderModel"/> class. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="nameFinderModel">The name finder model.</param> /// <param name="generatorDescriptor">The generator descriptor.</param> /// <param name="resources">The resources.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> public TokenNameFinderModel( string languageCode, IMaxentModel nameFinderModel, byte[] generatorDescriptor, Dictionary <string, object> resources, Dictionary <string, string> manifestInfoEntries) : this(languageCode, nameFinderModel, NameFinderME.DefaultBeamSize, generatorDescriptor, resources, manifestInfoEntries, new BioCodec()) { }
/// <summary> /// Initializes a new instance of the <see cref="ChunkerModel"/> class with a specified <paramref name="beamSize"/> value. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="chunkerModel">The chunker model.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <param name="factory">The chunker factory.</param> public ChunkerModel(string languageCode, IMaxentModel chunkerModel, int beamSize, Dictionary <string, string> manifestInfoEntries, ChunkerFactory factory) : base(ComponentName, languageCode, manifestInfoEntries, factory) { artifactMap[ChunkerEntry] = chunkerModel; Manifest[Parameters.BeamSize] = beamSize.ToString(CultureInfo.InvariantCulture); CheckArtifactMap(); }
private bool IsModelValid(IMaxentModel model) { var outcomes = new string[model.GetNumOutcomes()]; for (var i = 0; i < model.GetNumOutcomes(); i++) { outcomes[i] = model.GetOutcome(i); } return(Factory.CreateSequenceCodec().AreOutcomesCompatible(outcomes)); }
/// <summary> /// Initializes a new instance of the <see cref="TokenizerME"/> class. /// </summary> /// <param name="model">The tokenizer model.</param> public TokenizerME(TokenizerModel model) { this.model = model.MaxentModel; var factory = model.Factory; alphanumeric = new Regex(factory.AlphaNumericPattern, RegexOptions.Compiled); cg = factory.ContextGenerator; useAlphaNumericOptimization = model.UseAlphaNumericOptimization; newTokens = new List<Span>(); tokProbs = new List<double>(50); }
/// <summary> /// Creates new search object with the specified cache size. /// </summary> /// <param name="size">The size of the beam (k).</param> /// <param name="model">The model for assigning probabilities to the sequence outcomes.</param> /// <param name="cacheSize">Size of the cache.</param> public BeamSearch(int size, IMaxentModel model, int cacheSize) { this.size = size; this.model = model; if (cacheSize > 0) { contextsCache = new Cache(cacheSize); } probs = new double[model.GetNumOutcomes()]; }
/// <summary> /// Initializes a new instance of the <see cref="TokenizerME"/> class. /// </summary> /// <param name="model">The tokenizer model.</param> public TokenizerME(TokenizerModel model) { this.model = model.MaxentModel; var factory = model.Factory; alphanumeric = new Regex(factory.AlphaNumericPattern, RegexOptions.Compiled); cg = factory.ContextGenerator; useAlphaNumericOptimization = model.UseAlphaNumericOptimization; newTokens = new List <Span>(); tokProbs = new List <double>(50); }
/// <summary> /// Checks if the expected outcomes are all contained as outcomes in the given model. /// </summary> /// <param name="model">The model.</param> /// <param name="expectedOutcomes">The expected outcomes.</param> /// <returns><c>true</c> if all expected outcomes are the only outcomes of the model;<c>false</c> otherwise.</returns> public static bool ValidateOutcomes(IMaxentModel model, params string[] expectedOutcomes) { if (model.GetNumOutcomes() == expectedOutcomes.Length) { var count = model.GetNumOutcomes(); for (int i = 0; i < count; i++) { if (!expectedOutcomes.Contains(model.GetOutcome(i))) { return false; } } } else { return false; } return true; }
/// <summary> /// Creates new search object. /// </summary> /// <param name="size">The size of the beam (k).</param> /// <param name="cg">The context generator for the model.</param> /// <param name="model">The model for assigning probabilities to the sequence outcomes.</param> /// <param name="validator">The sequence validator.</param> /// <param name="cacheSize">Size of the cache.</param> public BeamSearch(int size, IBeamSearchContextGenerator <T> cg, IMaxentModel model, ISequenceValidator <T> validator, int cacheSize) { this.cg = cg; this.size = size; this.model = model; this.validator = validator; if (cacheSize > 0) { contextsCache = new Cache(cacheSize); } probs = new double[model.GetNumOutcomes()]; }
/// <summary> /// Trains a lemmatizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training /// operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="LemmatizerModel" /> object.</returns> /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static LemmatizerModel Train(string languageCode, IObjectStream <LemmaSample> samples, TrainingParameters parameters, LemmatizerFactory factory, Monitor monitor) { var manifestInfoEntries = new Dictionary <string, string>(); var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); var cg = factory.GetContextGenerator(); var trainerType = TrainerFactory.GetTrainerType(parameters); if (!trainerType.HasValue) { throw new InvalidOperationException("The trainer was not specified."); } IMaxentModel model = null; ML.Model.ISequenceClassificationModel <string> seqModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var s1 = new LemmaSampleEventStream(samples, cg); var t1 = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); model = t1.Train(s1); break; case TrainerType.EventModelSequenceTrainer: var s2 = new LemmaSampleSequenceStream(samples, cg); var t2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); model = t2.Train(s2); break; case TrainerType.SequenceTrainer: var s3 = new LemmaSampleSequenceStream(samples, cg); var t3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); seqModel = t3.Train(s3); break; default: throw new NotSupportedException("Trainer type is not supported."); } return(model != null ? new LemmatizerModel(languageCode, model, beamSize, manifestInfoEntries, factory) : new LemmatizerModel(languageCode, seqModel, manifestInfoEntries, factory)); }
public POSModel(string languageCode, IMaxentModel posModel, int beamSize, Dictionary <string, string> manifestInfoEntries, POSTaggerFactory posFactory) : base(ComponentName, languageCode, manifestInfoEntries, posFactory) { if (posModel == null) { throw new InvalidOperationException("The maxentPosModel param must not be null!"); } Manifest[Parameters.BeamSize] = beamSize.ToString(CultureInfo.InvariantCulture); artifactMap[EntryName] = posModel; CheckArtifactMap(); }
public POSModel(string languageCode, IMaxentModel posModel, int beamSize, Dictionary <string, string> manifestInfoEntries, POSTaggerFactory posFactory) : base(ComponentName, languageCode, manifestInfoEntries, posFactory) { // TODO: fix the beamSize parameter or use it ! if (posModel == null) { throw new InvalidOperationException("The maxentPosModel param must not be null!"); } artifactMap[EntryName] = posModel; CheckArtifactMap(); }
/// <summary> /// Trains a Part of Speech model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="POSModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static POSModel Train(string languageCode, IObjectStream <POSSample> samples, TrainingParameters parameters, POSTaggerFactory factory, Monitor monitor) { //int beamSize = trainParams.Get(Parameters.BeamSize, NameFinderME.DefaultBeamSize); var contextGenerator = factory.GetPOSContextGenerator(); var manifestInfoEntries = new Dictionary <string, string>(); var trainerType = TrainerFactory.GetTrainerType(parameters); IMaxentModel posModel = null; ML.Model.ISequenceClassificationModel <string> seqPosModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var es = new POSSampleEventStream(samples, contextGenerator); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); posModel = trainer.Train(es); break; case TrainerType.EventModelSequenceTrainer: var ss = new POSSampleSequenceStream(samples, contextGenerator); var trainer2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); posModel = trainer2.Train(ss); break; case TrainerType.SequenceTrainer: var trainer3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); // TODO: This will probably cause issue, since the feature generator uses the outcomes array var ss2 = new POSSampleSequenceStream(samples, contextGenerator); seqPosModel = trainer3.Train(ss2); break; default: throw new NotSupportedException("Trainer type is not supported."); } if (posModel != null) { return(new POSModel(languageCode, posModel, manifestInfoEntries, factory)); } return(new POSModel(languageCode, seqPosModel, manifestInfoEntries, factory)); }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) { throw new ArgumentException(@"attachModel must be null for chunking parser!", nameof(attachModel)); } Manifest[ParserTypeParameter] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) { throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", nameof(attachModel)); } Manifest[ParserTypeParameter] = "TREEINSERT"; artifactMap[EntryAttachModel] = attachModel; break; default: throw new ArgumentOutOfRangeException(nameof(modelType), "Unknown model type"); } artifactMap[EntryBuildModel] = buildModel; artifactMap[EntryCheckModel] = checkModel; artifactMap[EntryParserTaggerModel] = parserTagger; artifactMap[EntryChunkerTaggerModel] = chunkerTagger; artifactMap[EntryHeadRules] = headRules; CheckArtifactMap(); }
public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) { throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel"); } Manifest[PARSER_TYPE] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) { throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", "attachModel"); } Manifest[PARSER_TYPE] = "TREEINSERT"; artifactMap[ATTACH_MODEL_ENTRY_NAME] = attachModel; break; default: throw new ArgumentException(@"Unknown mode type.", "modelType"); } artifactMap[BUILD_MODEL_ENTRY_NAME] = buildModel; artifactMap[CHECK_MODEL_ENTRY_NAME] = checkModel; artifactMap[PARSER_TAGGER_MODEL_ENTRY_NAME] = parserTagger; artifactMap[CHUNKER_TAGGER_MODEL_ENTRY_NAME] = chunkerTagger; artifactMap[HEAD_RULES_MODEL_ENTRY_NAME] = headRules; CheckArtifactMap(); }
/// <summary> /// Trains a chunker model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>The trained <see cref="ChunkerModel"/> object.</returns> /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ChunkerModel Train(string languageCode, IObjectStream <ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory, Monitor monitor) { var trainerType = TrainerFactory.GetTrainerType(parameters); if (!trainerType.HasValue) { throw new InvalidOperationException("The trainer was not specified."); } var manifestInfoEntries = new Dictionary <string, string>(); IMaxentModel chunkerModel = null; ML.Model.ISequenceClassificationModel <string> seqChunkerModel = null; switch (trainerType) { case TrainerType.SequenceTrainer: var st = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); // TODO: This will probably cause issue, since the feature generator uses the outcomes array var ss = new ChunkSampleSequenceStream(samples, factory.GetContextGenerator()); seqChunkerModel = st.Train(ss); break; case TrainerType.EventModelTrainer: var es = new ChunkerEventStream(samples, factory.GetContextGenerator()); var et = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); chunkerModel = et.Train(es); break; default: throw new NotSupportedException("Trainer type is not supported."); } var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); return(chunkerModel != null ? new ChunkerModel(languageCode, chunkerModel, beamSize, manifestInfoEntries, factory) : new ChunkerModel(languageCode, seqChunkerModel, manifestInfoEntries, factory)); }
/// <summary> /// Gets the model mapped to the given domain. /// </summary> /// <param name="domain">The domain object which keys to the desired model..</param> /// <returns>The <see cref="Model.IMaxentModel"/> corresponding to the given domain.</returns> public IMaxentModel GetModel(IModelDomain domain) { IMaxentModel result = null; _lock.EnterReadLock(); bool mapContainsKey = _map.ContainsKey(domain); if (mapContainsKey) { result = _map[domain]; } _lock.ExitReadLock(); if (!mapContainsKey) { throw new KeyNotFoundException(String.Format("No model has been created for domain: {0}", domain)); } return(result); }
/// <summary> /// Initializes a new instance of the <see cref="TokenNameFinderModel" /> class. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="nameFinderModel">The name finder model.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="generatorDescriptor">The generator descriptor.</param> /// <param name="resources">The resources.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <param name="sequenceCodec">The sequence codec.</param> /// <exception cref="System.InvalidOperationException">Model not compatible with name finder!</exception> public TokenNameFinderModel( string languageCode, IMaxentModel nameFinderModel, int beamSize, byte[] generatorDescriptor, Dictionary <string, object> resources, Dictionary <string, string> manifestInfoEntries, ISequenceCodec <string> sequenceCodec) : base(ComponentName, languageCode, manifestInfoEntries) { Manifest[Parameters.BeamSize] = beamSize.ToString(CultureInfo.InvariantCulture); Init(nameFinderModel, generatorDescriptor, resources, sequenceCodec); if (!IsModelValid(nameFinderModel)) { throw new InvalidOperationException("Model not compatible with name finder!"); } }
/// <summary> /// Checks if the expected outcomes are all contained as outcomes in the given model. /// </summary> /// <param name="model">The model.</param> /// <param name="expectedOutcomes">The expected outcomes.</param> /// <returns><c>true</c> if all expected outcomes are the only outcomes of the model;<c>false</c> otherwise.</returns> public static bool ValidateOutcomes(IMaxentModel model, params string[] expectedOutcomes) { if (model.GetNumOutcomes() == expectedOutcomes.Length) { var count = model.GetNumOutcomes(); for (int i = 0; i < count; i++) { if (!expectedOutcomes.Contains(model.GetOutcome(i))) { return(false); } } } else { return(false); } return(true); }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary<string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel"); Manifest[ParserTypeParameter] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", "attachModel"); Manifest[ParserTypeParameter] = "TREEINSERT"; artifactMap[EntryAttachModel] = attachModel; break; default: throw new ArgumentOutOfRangeException("modelType", "Unknown model type"); } artifactMap[EntryBuildModel] = buildModel; artifactMap[EntryCheckModel] = checkModel; artifactMap[EntryParserTaggerModel] = parserTagger; artifactMap[EntryChunkerTaggerModel] = chunkerTagger; artifactMap[EntryHeadRules] = headRules; CheckArtifactMap(); }
/// <summary> /// Initializes a new instance of the <see cref="SentenceDetectorME" /> using the given sentence model. /// </summary> /// <param name="sentenceModel">The sentence model.</param> /// <exception cref="System.ArgumentNullException"> /// The <paramref name="sentenceModel"/> is <c>null</c>. /// </exception> public SentenceDetectorME(SentenceModel sentenceModel) { if (sentenceModel == null) throw new ArgumentNullException("sentenceModel"); model = sentenceModel.MaxentModel; cgen = sentenceModel.Factory.GetContextGenerator(); scanner = sentenceModel.Factory.GetEndOfSentenceScanner(); useTokenEnd = sentenceModel.UseTokenEnd; if (sentenceModel.Abbreviations == null) return; stringComparison = sentenceModel.Abbreviations.IsCaseSensitive ? StringComparison.Ordinal : StringComparison.OrdinalIgnoreCase; abbreviationTokens = new Dictionary<string, int>(); foreach (var abbreviation in sentenceModel.Abbreviations) foreach (var token in abbreviation.Tokens) abbreviationTokens.Add(token, token.Length); }
private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) { this.buildModel = buildModel; this.checkModel = checkModel; bProbs = new double[buildModel.GetNumOutcomes()]; cProbs = new double[checkModel.GetNumOutcomes()]; buildContextGenerator = new BuildContextGenerator(); checkContextGenerator = new CheckContextGenerator(); startTypeMap = new Dictionary<string, string>(); contTypeMap = new Dictionary<string, string>(); for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++) { var outcome = buildModel.GetOutcome(boi); if (outcome.StartsWith(START)) { startTypeMap[outcome] = outcome.Substring(START.Length); } else if (outcome.StartsWith(CONT)) { contTypeMap[outcome] = outcome.Substring(CONT.Length); } } topStartIndex = buildModel.GetIndex(TOP_START); completeIndex = checkModel.GetIndex(COMPLETE); incompleteIndex = checkModel.GetIndex(INCOMPLETE); }
private static void TestModel(IMaxentModel model, double expectedAccuracy) { var devEvents = PrepAttachDataUtility.ReadPpaFile(@"devset"); var total = 0; var correct = 0; foreach (var ev in devEvents) { var targetLabel = ev.Outcome; var ocs = model.Eval(ev.Context); var best = 0; for (var i = 1; i < ocs.Length; i++) { if (ocs[i] > ocs[best]) { best = i; } } var predictedLabel = model.GetOutcome(best); if (targetLabel.Equals(predictedLabel)) { correct++; } total++; } var accuracy = correct / (double)total; Debug.WriteLine("Accuracy on PPA devset: (" + correct + "/" + total + ") " + accuracy); Assert.AreEqual(expectedAccuracy, accuracy, .00001); }
internal static void TestModel(IMaxentModel model, double expecedAccuracy) { var devEvents = readPpaFile("devset"); var total = 0; var correct = 0; foreach (var ev in devEvents) { //String targetLabel = ev.getOutcome(); var ocs = model.Eval(ev.Context); var best = 0; for (var i = 1; i < ocs.Length; i++) { if (ocs[i] > ocs[best]) { best = i; } } var predictedLabel = model.GetOutcome(best); if (ev.Outcome.Equals(predictedLabel)) { correct++; } total++; } var accuracy = correct / (double)total; Console.Out.WriteLine("Accuracy on PPA devSet: (" + correct + "/" + total + ") " + accuracy); Assert.AreEqual(expecedAccuracy, accuracy, .00001); }
public POSModel(string languageCode, IMaxentModel posModel, int beamSize, Dictionary<string, string> manifestInfoEntries, POSTaggerFactory posFactory) : base(ComponentName, languageCode, manifestInfoEntries, posFactory) { if (posModel == null) throw new InvalidOperationException("The maxentPosModel param must not be null!"); Manifest[Parameters.BeamSize] = beamSize.ToString(CultureInfo.InvariantCulture); artifactMap[EntryName] = posModel; CheckArtifactMap(); }
/// <summary> /// Initializes a new instance of the <see cref="TokenizerModel"/> class. /// </summary> /// <param name="tokenizerModel">The tokenizer model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <param name="tokenizerFactory">The tokenizer factory.</param> public TokenizerModel(IMaxentModel tokenizerModel, Dictionary<string, string> manifestInfoEntries, TokenizerFactory tokenizerFactory) : base(ComponentName, tokenizerFactory.LanguageCode, manifestInfoEntries, tokenizerFactory) { artifactMap.Add(TokenizerModelEntry, tokenizerModel); CheckArtifactMap(); }
public ParserModel UpdateCheckModel(IMaxentModel checkModel) { return(new ParserModel(Language, BuildModel, checkModel, AttachModel, ParserTaggerModel, ParserChunkerModel, HeadRules, ParserType)); }
/// <summary> /// Initializes a new instance of the <see cref="TokenNameFinderModel"/> class. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="nameFinderModel">The name finder model.</param> /// <param name="generatorDescriptor">The generator descriptor.</param> /// <param name="resources">The resources.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> public TokenNameFinderModel( string languageCode, IMaxentModel nameFinderModel, byte[] generatorDescriptor, Dictionary<string, object> resources, Dictionary<string, string> manifestInfoEntries) : this(languageCode, nameFinderModel, NameFinderME.DefaultBeamSize, generatorDescriptor, resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory()) { }
/// <summary> /// Initializes a new instance of the <see cref="TokenNameFinderModel"/> class. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="nameFinderModel">The name finder model.</param> /// <param name="resources">The resources.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> public TokenNameFinderModel( string languageCode, IMaxentModel nameFinderModel, Dictionary<string, object> resources, Dictionary<string, string> manifestInfoEntries ) : this(languageCode, nameFinderModel, null, resources, manifestInfoEntries) { }
private bool IsModelValid(IMaxentModel model) { var outcomes = new string[model.GetNumOutcomes()]; for (var i = 0; i < model.GetNumOutcomes(); i++) { outcomes[i] = model.GetOutcome(i); } return Factory.CreateSequenceCodec().AreOutcomesCompatible(outcomes); }
/// <summary> /// Initializes a new instance of the <see cref="TokenNameFinderModel" /> class. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="nameFinderModel">The name finder model.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="generatorDescriptor">The generator descriptor.</param> /// <param name="resources">The resources.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <param name="sequenceCodec">The sequence codec.</param> /// <param name="factory">The tool factory.</param> /// <exception cref="InvalidOperationException">Model not compatible with name finder!</exception> /// <exception cref="System.InvalidOperationException">Model not compatible with name finder!</exception> public TokenNameFinderModel( string languageCode, IMaxentModel nameFinderModel, int beamSize, byte[] generatorDescriptor, Dictionary<string, object> resources, Dictionary<string, string> manifestInfoEntries, ISequenceCodec<string> sequenceCodec, TokenNameFinderFactory factory) : base(ComponentName, languageCode, manifestInfoEntries, factory) { Manifest[Parameters.BeamSize] = beamSize.ToString(CultureInfo.InvariantCulture); Init(nameFinderModel, generatorDescriptor, resources, sequenceCodec); if (!IsModelValid(nameFinderModel)) throw new InvalidOperationException("Model not compatible with name finder!"); }
public POSModel( string languageCode, IMaxentModel posModel, Dictionary<string, string> manifestInfoEntries, POSTaggerFactory posFactory) : this(languageCode, posModel, POSTaggerME.DefaultBeamSize, manifestInfoEntries, posFactory) { }
private Parser( IMaxentModel buildModel, IMaxentModel attachModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize, double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) { this.buildModel = buildModel; this.attachModel = attachModel; this.checkModel = checkModel; buildContextGenerator = new BuildContextGenerator(); attachContextGenerator = new AttachContextGenerator(punctSet); checkContextGenerator = new CheckContextGenerator(punctSet); bProbs = new double[buildModel.GetNumOutcomes()]; aProbs = new double[attachModel.GetNumOutcomes()]; cProbs = new double[checkModel.GetNumOutcomes()]; doneIndex = buildModel.GetIndex(DONE); sisterAttachIndex = attachModel.GetIndex(ATTACH_SISTER); daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER); // nonAttachIndex = attachModel.GetIndex(NON_ATTACH); attachments = new[] {daughterAttachIndex, sisterAttachIndex}; completeIndex = checkModel.GetIndex(COMPLETE); }
public SentenceModel(string languageCode, IMaxentModel sentModel, Dictionary<string, string> manifestInfoEntries, SentenceDetectorFactory sdFactory) : base(ComponentName, languageCode, manifestInfoEntries, sdFactory) { artifactMap.Add(EntryName, sentModel); CheckArtifactMap(); }
/// <summary> /// Sets the model for the given domain. /// </summary> /// <param name="domain">The <see cref="MaxEnt.IModelDomain"/> object which keys to the model.</param> /// <param name="model">The <see cref="Model.IMaxentModel"/> trained for the domain.</param> public void SetModelForDomain(IModelDomain domain, IMaxentModel model) { _lock.EnterWriteLock(); _map.Add(domain, model); _lock.ExitWriteLock(); }
/// <summary> /// Initializes a new instance of the <see cref="DocumentCategorizerModel"/> with the default parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="doccatModel">The doccat model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <param name="factory">The factory.</param> public DocumentCategorizerModel(string languageCode, IMaxentModel doccatModel, Dictionary<string, string> manifestInfoEntries, DocumentCategorizerFactory factory) : base(ComponentName, languageCode, manifestInfoEntries, factory) { artifactMap.Add(DoccatEntry, doccatModel); CheckArtifactMap(); }
public ParserModel UpdateCheckModel(IMaxentModel checkModel) { return new ParserModel(Language, BuildModel, checkModel, AttachModel, ParserTaggerModel, ParserChunkerModel, HeadRules, ParserType); }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules using the model type as chunking. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, Dictionary<string, string> manifestInfoEntries) : this( languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, ParserType.Chunking, manifestInfoEntries) { }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules without manifest information entries. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType) : this( languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType, null) { }