Example #1
0
 /// <summary>
 /// Trains a chunker model with the given parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="samples">The data samples.</param>
 /// <param name="factory">The sentence detector factory.</param>
 /// <param name="parameters">The machine learnable parameters.</param>
 /// <returns>The trained <see cref="ChunkerModel"/> object.</returns>
 /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
 /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
 public static ChunkerModel Train(
     string languageCode,
     IObjectStream <ChunkSample> samples,
     TrainingParameters parameters,
     ChunkerFactory factory)
 {
     return(Train(languageCode, samples, parameters, factory, null));
 }
        /// <summary>
        /// Initializes a new instance of the <see cref="ChunkerCrossValidator"/> class.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="parameters">The parameters.</param>
        /// <param name="factory">The factory.</param>
        /// <param name="listeners">The listeners.</param>
        public ChunkerCrossValidator(
            string languageCode,
            TrainingParameters parameters,
            ChunkerFactory factory,
            params IEvaluationMonitor <ChunkSample>[] listeners)
        {
            chunkerFactory = factory;
            FMeasure       = new FMeasure <Span>();

            this.languageCode = languageCode;
            this.parameters   = parameters;
            this.listeners    = listeners;
        }
Example #3
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ChunkerCrossValidator"/> class.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="parameters">The parameters.</param>
        /// <param name="factory">The factory.</param>
        /// <param name="listeners">The listeners.</param>
        public ChunkerCrossValidator(
            string languageCode,
            TrainingParameters parameters, 
            ChunkerFactory factory, 
            params IEvaluationMonitor<ChunkSample>[] listeners) {

            chunkerFactory = factory;
            FMeasure = new FMeasure<Span>();
            
            this.languageCode = languageCode;
            this.parameters = parameters;
            this.listeners = listeners;           
        }
Example #4
0
        /// <summary>
        /// Trains a chunker model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>The trained <see cref="ChunkerModel"/> object.</returns>
        /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ChunkerModel Train(string languageCode, IObjectStream <ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory, Monitor monitor)
        {
            var trainerType = TrainerFactory.GetTrainerType(parameters);

            if (!trainerType.HasValue)
            {
                throw new InvalidOperationException("The trainer was not specified.");
            }

            var manifestInfoEntries = new Dictionary <string, string>();



            IMaxentModel chunkerModel = null;

            ML.Model.ISequenceClassificationModel <string> seqChunkerModel = null;

            switch (trainerType)
            {
            case TrainerType.SequenceTrainer:
                var st = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                // TODO: This will probably cause issue, since the feature generator uses the outcomes array

                var ss = new ChunkSampleSequenceStream(samples, factory.GetContextGenerator());

                seqChunkerModel = st.Train(ss);
                break;

            case TrainerType.EventModelTrainer:
                var es = new ChunkerEventStream(samples, factory.GetContextGenerator());
                var et = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                chunkerModel = et.Train(es);
                break;

            default:
                throw new NotSupportedException("Trainer type is not supported.");
            }

            var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize);

            return(chunkerModel != null
                ? new ChunkerModel(languageCode, chunkerModel, beamSize, manifestInfoEntries, factory)
                : new ChunkerModel(languageCode, seqChunkerModel, manifestInfoEntries, factory));
        }
 private static ChunkerModel TrainModel(ChunkerFactory factory) {
     return ChunkerME.Train("en", ChunkerMETest.CreateSampleStream(), TrainingParameters.DefaultParameters(), factory);
 }
Example #6
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ChunkerModel"/> class with a specified <paramref name="beamSize"/> value.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="chunkerModel">The chunker model.</param>
        /// <param name="beamSize">Size of the beam.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <param name="factory">The chunker factory.</param>
        public ChunkerModel(string languageCode, IMaxentModel chunkerModel, int beamSize, Dictionary <string, string> manifestInfoEntries, ChunkerFactory factory)
            : base(ComponentName, languageCode, manifestInfoEntries, factory)
        {
            artifactMap[ChunkerEntry] = chunkerModel;

            Manifest[Parameters.BeamSize] = beamSize.ToString(CultureInfo.InvariantCulture);

            CheckArtifactMap();
        }
Example #7
0
 /// <summary>
 /// Initializes a new instance of the <see cref="ChunkerModel"/> class.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="chunkerModel">The chunker model.</param>
 /// <param name="manifestInfoEntries">The manifest information entries.</param>
 /// <param name="factory">The chunker factory.</param>
 public ChunkerModel(string languageCode, IMaxentModel chunkerModel, Dictionary <string, string> manifestInfoEntries, ChunkerFactory factory)
     : this(languageCode, chunkerModel, ChunkerME.DefaultBeamSize, manifestInfoEntries, factory)
 {
 }
Example #8
0
 /// <summary>
 /// Initializes a new instance of the <see cref="ChunkerModel"/> class.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="chunkerModel">The chunker model.</param>
 /// <param name="factory">The chunker factory.</param>
 public ChunkerModel(string languageCode, IMaxentModel chunkerModel, ChunkerFactory factory)
     : this(languageCode, chunkerModel, ChunkerME.DefaultBeamSize, null, factory)
 {
 }
Example #9
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ChunkerModel"/> class using a <see cref="T:ISequenceClassificationModel{string}"/> as the chunker model.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="chunkerModel">The chunker model.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <param name="factory">The chunker factory.</param>
        public ChunkerModel(string languageCode, ISequenceClassificationModel <string> chunkerModel, Dictionary <string, string> manifestInfoEntries, ChunkerFactory factory)
            : base(ComponentName, languageCode, manifestInfoEntries, factory)
        {
            artifactMap.Add(ChunkerEntry, chunkerModel);

            CheckArtifactMap();
        }
Example #10
0
        /// <summary>
        /// Trains a chunker model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>The trained <see cref="ChunkerModel"/> object.</returns>
        /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ChunkerModel Train(string languageCode, IObjectStream<ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory, Monitor monitor) {

            var trainerType = TrainerFactory.GetTrainerType(parameters);
            if (!trainerType.HasValue) {
                throw new InvalidOperationException("The trainer was not specified.");
            }

            var manifestInfoEntries = new Dictionary<string, string>();

            

            IMaxentModel chunkerModel = null;
            ML.Model.ISequenceClassificationModel<string> seqChunkerModel = null;

            switch (trainerType) {
                case TrainerType.SequenceTrainer:
                    var st = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                    // TODO: This will probably cause issue, since the feature generator uses the outcomes array

                    var ss = new ChunkSampleSequenceStream(samples, factory.GetContextGenerator());

                    seqChunkerModel = st.Train(ss);
                    break;
                case TrainerType.EventModelTrainer:
                    var es = new ChunkerEventStream(samples, factory.GetContextGenerator());
                    var et = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                    chunkerModel = et.Train(es);
                    break;
                default:
                    throw new NotSupportedException("Trainer type is not supported.");
            }

            var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize);

            return chunkerModel != null
                ? new ChunkerModel(languageCode, chunkerModel, beamSize, manifestInfoEntries, factory) 
                : new ChunkerModel(languageCode, seqChunkerModel, manifestInfoEntries, factory);
        }
Example #11
0
        /// <summary>
        /// Trains a chunker model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <returns>The trained <see cref="ChunkerModel"/> object.</returns>
        /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ChunkerModel Train(
            string languageCode,
            IObjectStream<ChunkSample> samples,
            TrainingParameters parameters,
            ChunkerFactory factory) {

            return Train(languageCode, samples, parameters, factory, null);
        }