Esempio n. 1
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ChunkerME"/> with the specified <see cref="ChunkerModel"/>.
        /// </summary>
        /// <param name="model">The chunker model.</param>
        public ChunkerME(ChunkerModel model)
        {
            contextGenerator  = model.Factory.GetContextGenerator();
            sequenceValidator = model.Factory.GetSequenceValidator();

            this.model = model.ChunkerSequenceModel ?? new BeamSearch(model.BeamSize, model.MaxentModel);
        }
Esempio n. 2
0
        /// <summary>
        /// Trains a name finder model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language of the training data.</param>
        /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param>
        /// <param name="samples">The training samples.</param>
        /// <param name="parameters">The machine learning train parameters.</param>
        /// <param name="factory">The name finder factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
        public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream <NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor)
        {
            var beamSize            = parameters.Get(Parameters.BeamSize, DefaultBeamSize);
            var manifestInfoEntries = new Dictionary <string, string>();
            var trainerType         = TrainerFactory.GetTrainerType(parameters);

            IMaxentModel meModel = null;

            ML.Model.ISequenceClassificationModel <string> scModel = null;

            switch (trainerType)
            {
            case TrainerType.EventModelTrainer:
                var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(),
                                                            factory.CreateSequenceCodec());
                var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                meModel = nfTrainer.Train(eventStream);
                break;

            case TrainerType.EventModelSequenceTrainer:
                var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                var nsTrainer    = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                meModel = nsTrainer.Train(sampleStream);
                break;

            case TrainerType.SequenceTrainer:
                var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                var sqTrainer      = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);


                scModel = sqTrainer.Train(sequenceStream);
                break;

            default:
                throw new InvalidOperationException("Unexpected trainer type!");
            }

            if (scModel != null)
            {
                return(new TokenNameFinderModel(
                           languageCode,
                           scModel,
                           factory.FeatureGenerator,
                           factory.Resources,
                           manifestInfoEntries,
                           factory.SequenceCodec));
            }

            return(new TokenNameFinderModel(
                       languageCode,
                       meModel,
                       beamSize,
                       factory.FeatureGenerator,
                       factory.Resources,
                       manifestInfoEntries,
                       factory.SequenceCodec));
        }
Esempio n. 3
0
        /// <summary>
        /// Initializes the current instance with the specified model and the specified beam size.
        /// </summary>
        /// <param name="model">The model for this chunker</param>
        /// <param name="beamSize">The size of the beam that should be used when decoding sequences.</param>
        /// <param name="sequenceValidator">The <see cref="ISequenceValidator{String}"/> to determines whether the outcome is valid for the preceding sequence. This can be used to implement constraints on what sequences are valid..</param>
        /// <param name="contextGenerator">The context generator.</param>
        internal ChunkerME(ChunkerModel model, int beamSize, ISequenceValidator<string> sequenceValidator, IChunkerContextGenerator contextGenerator) {
            // This method is marked as deprecated in the OpenNLP, but it is required in the Parser,
            // I could change the cg in the factory, but its not ideal in this situation (i think) :P

            this.sequenceValidator = sequenceValidator;
            this.contextGenerator = contextGenerator;
            this.model = model.ChunkerSequenceModel ?? new BeamSearch(beamSize, model.MaxentModel);
        }
Esempio n. 4
0
        /// <summary>
        /// Initializes the current instance with the specified model and the specified beam size.
        /// </summary>
        /// <param name="model">The model for this chunker</param>
        /// <param name="beamSize">The size of the beam that should be used when decoding sequences.</param>
        /// <param name="sequenceValidator">The <see cref="ISequenceValidator{String}"/> to determines whether the outcome is valid for the preceding sequence. This can be used to implement constraints on what sequences are valid..</param>
        /// <param name="contextGenerator">The context generator.</param>
        internal ChunkerME(ChunkerModel model, int beamSize, ISequenceValidator <string> sequenceValidator, IChunkerContextGenerator contextGenerator)
        {
            // This method is marked as deprecated in the OpenNLP, but it is required in the Parser,
            // I could change the cg in the factory, but its not ideal in this situation (i think) :P

            this.sequenceValidator = sequenceValidator;
            this.contextGenerator  = contextGenerator;
            this.model             = model.ChunkerSequenceModel ?? new BeamSearch(beamSize, model.MaxentModel);
        }
Esempio n. 5
0
        public LemmatizerME(LemmatizerModel model)
        {
            if (model == null)
            {
                throw new ArgumentNullException(nameof(model));
            }

            contextGenerator  = model.Factory.GetContextGenerator();
            sequenceValidator = model.Factory.GetSequenceValidator();

            // Knuppe: In the original implementation there is condition to recreate the beamsearch object, but
            // the condition is impossible to occur, due to the getLemmatizerSequenceModel() method logic
            this.model = model.LemmatizerSequenceModel;
        }
Esempio n. 6
0
        /// <summary>
        /// Trains a lemmatizer model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training
        /// operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="LemmatizerModel" /> object.</returns>
        /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static LemmatizerModel Train(string languageCode, IObjectStream <LemmaSample> samples, TrainingParameters parameters, LemmatizerFactory factory, Monitor monitor)
        {
            var manifestInfoEntries = new Dictionary <string, string>();
            var beamSize            = parameters.Get(Parameters.BeamSize, DefaultBeamSize);
            var cg = factory.GetContextGenerator();


            var trainerType = TrainerFactory.GetTrainerType(parameters);

            if (!trainerType.HasValue)
            {
                throw new InvalidOperationException("The trainer was not specified.");
            }


            IMaxentModel model = null;

            ML.Model.ISequenceClassificationModel <string> seqModel = null;

            switch (trainerType)
            {
            case TrainerType.EventModelTrainer:
                var s1 = new LemmaSampleEventStream(samples, cg);
                var t1 = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                model = t1.Train(s1);
                break;

            case TrainerType.EventModelSequenceTrainer:
                var s2 = new LemmaSampleSequenceStream(samples, cg);
                var t2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                model = t2.Train(s2);
                break;

            case TrainerType.SequenceTrainer:
                var s3 = new LemmaSampleSequenceStream(samples, cg);
                var t3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                seqModel = t3.Train(s3);
                break;

            default:
                throw new NotSupportedException("Trainer type is not supported.");
            }

            return(model != null
                ? new LemmatizerModel(languageCode, model, beamSize, manifestInfoEntries, factory)
                : new LemmatizerModel(languageCode, seqModel, manifestInfoEntries, factory));
        }
Esempio n. 7
0
        /// <summary>
        /// Trains a Part of Speech model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="POSModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static POSModel Train(string languageCode, IObjectStream <POSSample> samples, TrainingParameters parameters, POSTaggerFactory factory, Monitor monitor)
        {
            //int beamSize = trainParams.Get(Parameters.BeamSize, NameFinderME.DefaultBeamSize);

            var contextGenerator    = factory.GetPOSContextGenerator();
            var manifestInfoEntries = new Dictionary <string, string>();

            var trainerType = TrainerFactory.GetTrainerType(parameters);

            IMaxentModel posModel = null;

            ML.Model.ISequenceClassificationModel <string> seqPosModel = null;
            switch (trainerType)
            {
            case TrainerType.EventModelTrainer:
                var es      = new POSSampleEventStream(samples, contextGenerator);
                var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                posModel = trainer.Train(es);
                break;

            case TrainerType.EventModelSequenceTrainer:
                var ss       = new POSSampleSequenceStream(samples, contextGenerator);
                var trainer2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                posModel = trainer2.Train(ss);
                break;

            case TrainerType.SequenceTrainer:
                var trainer3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                // TODO: This will probably cause issue, since the feature generator uses the outcomes array

                var ss2 = new POSSampleSequenceStream(samples, contextGenerator);
                seqPosModel = trainer3.Train(ss2);
                break;

            default:
                throw new NotSupportedException("Trainer type is not supported.");
            }

            if (posModel != null)
            {
                return(new POSModel(languageCode, posModel, manifestInfoEntries, factory));
            }

            return(new POSModel(languageCode, seqPosModel, manifestInfoEntries, factory));
        }
Esempio n. 8
0
        /// <summary>
        /// Trains a chunker model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>The trained <see cref="ChunkerModel"/> object.</returns>
        /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ChunkerModel Train(string languageCode, IObjectStream <ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory, Monitor monitor)
        {
            var trainerType = TrainerFactory.GetTrainerType(parameters);

            if (!trainerType.HasValue)
            {
                throw new InvalidOperationException("The trainer was not specified.");
            }

            var manifestInfoEntries = new Dictionary <string, string>();



            IMaxentModel chunkerModel = null;

            ML.Model.ISequenceClassificationModel <string> seqChunkerModel = null;

            switch (trainerType)
            {
            case TrainerType.SequenceTrainer:
                var st = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                // TODO: This will probably cause issue, since the feature generator uses the outcomes array

                var ss = new ChunkSampleSequenceStream(samples, factory.GetContextGenerator());

                seqChunkerModel = st.Train(ss);
                break;

            case TrainerType.EventModelTrainer:
                var es = new ChunkerEventStream(samples, factory.GetContextGenerator());
                var et = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                chunkerModel = et.Train(es);
                break;

            default:
                throw new NotSupportedException("Trainer type is not supported.");
            }

            var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize);

            return(chunkerModel != null
                ? new ChunkerModel(languageCode, chunkerModel, beamSize, manifestInfoEntries, factory)
                : new ChunkerModel(languageCode, seqChunkerModel, manifestInfoEntries, factory));
        }
Esempio n. 9
0
        /// <summary>
        /// Initializes a new instance of the <see cref="NameFinderME"/> using the given <see cref="TokenNameFinderModel"/>.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <exception cref="System.ArgumentNullException">model</exception>
        public NameFinderME(TokenNameFinderModel model)
        {
            if (model == null)
            {
                throw new ArgumentNullException(nameof(model));
            }

            sequenceCodec     = model.Factory.CreateSequenceCodec();
            sequenceValidator = sequenceCodec.CreateSequenceValidator();

            this.model = model.NameFinderSequenceModel;

            contextGenerator = model.Factory.CreateContextGenerator();

            // TODO: We should deprecate this. And come up with a better solution!
            additionalContextFeatureGenerator = new AdditionalContextFeatureGenerator();
            contextGenerator.AddFeatureGenerator(
                new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
        }
Esempio n. 10
0
        /// <summary>
        /// Initializes a new instance of the <see cref="POSTaggerME"/> with the provided model
        /// and the default beam size of 3.
        /// </summary>
        /// <param name="model">The model.</param>
        public POSTaggerME(POSModel model)
        {
            if (model == null)
            {
                throw new ArgumentNullException("model");
            }

            var beamSize = model.Manifest.Get(Parameters.BeamSize, DefaultBeamSize);

            size = beamSize;

            modelPackage = model;

            TagDictionary = model.Factory.TagDictionary;

            ContextGenerator  = model.Factory.GetPOSContextGenerator(beamSize);
            SequenceValidator = model.Factory.GetSequenceValidator();

            this.model = model.PosSequenceModel ?? new ML.BeamSearch <string>(beamSize, model.MaxentModel, 0);
        }
Esempio n. 11
0
        /// <summary>
        /// Initializes a new instance of the <see cref="POSTaggerME" /> with the provided
        /// model and provided beam size.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="beamSize">Size of the beam.</param>
        /// <param name="cacheSize">Size of the cache.</param>
        /// <exception cref="System.ArgumentNullException"><paramref name="model"/></exception>
        /// <exception cref="System.InvalidOperationException">Unable to retrieve the model.</exception>
        public POSTaggerME(POSModel model, int beamSize, int cacheSize)
        {
            if (model == null)
            {
                throw new ArgumentNullException("model");
            }


            size         = beamSize;
            modelPackage = model;

            TagDictionary     = modelPackage.Factory.TagDictionary;
            ContextGenerator  = model.Factory.GetPOSContextGenerator(cacheSize);
            SequenceValidator = modelPackage.Factory.GetSequenceValidator();

            this.model = model.PosSequenceModel;

            if (this.model == null)
            {
                throw new InvalidOperationException("Unable to retrieve the model.");
            }
        }
Esempio n. 12
0
        /// <summary>
        /// Initializes a new instance of the <see cref="POSTaggerME"/> with the provided model
        /// and the default beam size of 3.
        /// </summary>
        /// <param name="model">The model.</param>
        public POSTaggerME(POSModel model) {

            if (model == null)
                throw new ArgumentNullException("model");

            var beamSize = model.Manifest.Get(Parameters.BeamSize, DefaultBeamSize);

            size = beamSize;

            modelPackage = model;

            TagDictionary = model.Factory.TagDictionary;
            
            ContextGenerator = model.Factory.GetPOSContextGenerator(beamSize);
            SequenceValidator = model.Factory.GetSequenceValidator();

            this.model = model.PosSequenceModel ?? new ML.BeamSearch<string>(beamSize, model.MaxentModel, 0);
        }
Esempio n. 13
0
        public POSTaggerME(POSModel model, int beamSize, int cacheSize) {
            if (model == null)
                throw new ArgumentNullException("model");


            size = beamSize;
            modelPackage = model;

            TagDictionary = modelPackage.Factory.TagDictionary;
            ContextGenerator = model.Factory.GetPOSContextGenerator(cacheSize);
            SequenceValidator = modelPackage.Factory.GetSequenceValidator();

            this.model = model.PosSequenceModel;

            if (this.model == null) {
                throw new InvalidOperationException("Unable to retrieve the model.");
            }


        }
Esempio n. 14
0
        /// <summary>
        /// Initializes a new instance of the <see cref="NameFinderME"/> using the given <see cref="TokenNameFinderModel"/>.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <exception cref="System.ArgumentNullException">model</exception>
        public NameFinderME(TokenNameFinderModel model) {
            if (model == null)
                throw new ArgumentNullException("model");

            sequenceCodec = model.Factory.CreateSequenceCodec();
            sequenceValidator = sequenceCodec.CreateSequenceValidator();

            this.model = model.NameFinderSequenceModel;

            contextGenerator = model.Factory.CreateContextGenerator();

            // TODO: We should deprecate this. And come up with a better solution!
            additionalContextFeatureGenerator = new AdditionalContextFeatureGenerator();
            contextGenerator.AddFeatureGenerator(
                new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8));
        }
Esempio n. 15
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ChunkerME"/> with the specified <see cref="ChunkerModel"/>.
        /// </summary>
        /// <param name="model">The chunker model.</param>
        public ChunkerME(ChunkerModel model) {
            contextGenerator = model.Factory.GetContextGenerator();
            sequenceValidator = model.Factory.GetSequenceValidator();

            this.model = model.ChunkerSequenceModel ?? new BeamSearch(model.BeamSize, model.MaxentModel);
        }