示例#1
0
        /// <summary>
        /// Trains a chunker model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>The trained <see cref="ChunkerModel"/> object.</returns>
        /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ChunkerModel Train(string languageCode, IObjectStream <ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory, Monitor monitor)
        {
            var trainerType = TrainerFactory.GetTrainerType(parameters);

            if (!trainerType.HasValue)
            {
                throw new InvalidOperationException("The trainer was not specified.");
            }

            var manifestInfoEntries = new Dictionary <string, string>();



            IMaxentModel chunkerModel = null;

            ML.Model.ISequenceClassificationModel <string> seqChunkerModel = null;

            switch (trainerType)
            {
            case TrainerType.SequenceTrainer:
                var st = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                // TODO: This will probably cause issue, since the feature generator uses the outcomes array

                var ss = new ChunkSampleSequenceStream(samples, factory.GetContextGenerator());

                seqChunkerModel = st.Train(ss);
                break;

            case TrainerType.EventModelTrainer:
                var es = new ChunkerEventStream(samples, factory.GetContextGenerator());
                var et = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                chunkerModel = et.Train(es);
                break;

            default:
                throw new NotSupportedException("Trainer type is not supported.");
            }

            var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize);

            return(chunkerModel != null
                ? new ChunkerModel(languageCode, chunkerModel, beamSize, manifestInfoEntries, factory)
                : new ChunkerModel(languageCode, seqChunkerModel, manifestInfoEntries, factory));
        }
示例#2
0
        /// <summary>
        /// Trains a chunker model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>The trained <see cref="ChunkerModel"/> object.</returns>
        /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ChunkerModel Train(string languageCode, IObjectStream<ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory, Monitor monitor) {

            var trainerType = TrainerFactory.GetTrainerType(parameters);
            if (!trainerType.HasValue) {
                throw new InvalidOperationException("The trainer was not specified.");
            }

            var manifestInfoEntries = new Dictionary<string, string>();

            

            IMaxentModel chunkerModel = null;
            ML.Model.ISequenceClassificationModel<string> seqChunkerModel = null;

            switch (trainerType) {
                case TrainerType.SequenceTrainer:
                    var st = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                    // TODO: This will probably cause issue, since the feature generator uses the outcomes array

                    var ss = new ChunkSampleSequenceStream(samples, factory.GetContextGenerator());

                    seqChunkerModel = st.Train(ss);
                    break;
                case TrainerType.EventModelTrainer:
                    var es = new ChunkerEventStream(samples, factory.GetContextGenerator());
                    var et = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                    chunkerModel = et.Train(es);
                    break;
                default:
                    throw new NotSupportedException("Trainer type is not supported.");
            }

            var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize);

            return chunkerModel != null
                ? new ChunkerModel(languageCode, chunkerModel, beamSize, manifestInfoEntries, factory) 
                : new ChunkerModel(languageCode, seqChunkerModel, manifestInfoEntries, factory);
        }