예제 #1
0
        /// <summary>
        /// Trains a name finder model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language of the training data.</param>
        /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param>
        /// <param name="samples">The training samples.</param>
        /// <param name="parameters">The machine learning train parameters.</param>
        /// <param name="factory">The name finder factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
        public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream <NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor)
        {
            var beamSize            = parameters.Get(Parameters.BeamSize, DefaultBeamSize);
            var manifestInfoEntries = new Dictionary <string, string>();
            var trainerType         = TrainerFactory.GetTrainerType(parameters);

            IMaxentModel meModel = null;

            ML.Model.ISequenceClassificationModel <string> scModel = null;

            switch (trainerType)
            {
            case TrainerType.EventModelTrainer:
                var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(),
                                                            factory.CreateSequenceCodec());
                var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                meModel = nfTrainer.Train(eventStream);
                break;

            case TrainerType.EventModelSequenceTrainer:
                var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                var nsTrainer    = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                meModel = nsTrainer.Train(sampleStream);
                break;

            case TrainerType.SequenceTrainer:
                var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                var sqTrainer      = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);


                scModel = sqTrainer.Train(sequenceStream);
                break;

            default:
                throw new InvalidOperationException("Unexpected trainer type!");
            }

            if (scModel != null)
            {
                return(new TokenNameFinderModel(
                           languageCode,
                           scModel,
                           factory.FeatureGenerator,
                           factory.Resources,
                           manifestInfoEntries,
                           factory.SequenceCodec));
            }

            return(new TokenNameFinderModel(
                       languageCode,
                       meModel,
                       beamSize,
                       factory.FeatureGenerator,
                       factory.Resources,
                       manifestInfoEntries,
                       factory.SequenceCodec));
        }
예제 #2
0
        /// <summary>
        /// Creates a new event array based on the outcomes predicted by the specified parameters for the specified sequence.
        /// </summary>
        /// <param name="sequence">The sequence to be evaluated.</param>
        /// <param name="model">The model.</param>
        /// <returns>The event array.</returns>
        public Event[] UpdateContext(Sequence sequence, AbstractModel model)
        {
            var tagger =
                new NameFinderME(
                    new TokenNameFinderModel("x-unspecified", model, new Dictionary <string, object>(), null));

            var sentence = sequence.GetSource <NameSample>().Sentence;

            var tags = seqCodec.Encode(tagger.Find(sentence), sentence.Length);

            return(NameFinderEventStream.GenerateEvents(sentence, tags, pcg).ToArray());
        }