Example #1
0
 /// <summary>
 /// Trains a name finder model.
 /// </summary>
 /// <param name="languageCode">The language of the training data.</param>
 /// <param name="samples">The training samples.</param>
 /// <param name="parameters">The machine learning train parameters.</param>
 /// <param name="factory">The name finder factory.</param>
 /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
 public static TokenNameFinderModel Train(
     string languageCode,
     IObjectStream <NameSample> samples,
     TrainingParameters parameters,
     TokenNameFinderFactory factory)
 {
     return(Train(languageCode, DefaultType, samples, parameters, factory));
 }
Example #2
0
 /// <summary>
 /// Initializes a new instance of the <see cref="TokenNameFinderCrossValidator"/> class.
 /// </summary>
 /// <param name="languageCode">The language of the training data.</param>
 /// <param name="type"><c>null</c> or an override type for all types in the training data.</param>
 /// <param name="parameters">The machine learning train parameters.</param>
 /// <param name="listeners">The listeners.</param>
 public TokenNameFinderCrossValidator(string languageCode, string type, TrainingParameters parameters, params IEvaluationMonitor <NameSample>[] listeners)
 {
     this.languageCode = languageCode;
     this.type         = type;
     this.parameters   = parameters;
     this.listeners    = listeners;
     factory           = new TokenNameFinderFactory();
     FMeasure          = new FMeasure <Span>();
 }
        /// <summary>
        /// Initializes a new instance of the <see cref="TokenNameFinderCrossValidator"/> class.
        /// </summary>
        /// <param name="languageCode">The language of the training data.</param>
        /// <param name="type"><c>null</c> or an override type for all types in the training data.</param>
        /// <param name="parameters">The machine learning train parameters.</param>
        /// <param name="factory">The name finder factory.</param>
        /// <param name="listeners">The listeners.</param>
        public TokenNameFinderCrossValidator(string languageCode, string type, TrainingParameters parameters, TokenNameFinderFactory factory, params IEvaluationMonitor<NameSample>[] listeners) {
            if (factory == null)
                throw new ArgumentNullException(nameof(factory));

            this.languageCode = languageCode;
            this.type = type;
            this.parameters = parameters;
            this.factory = factory;
            this.listeners = listeners;

            FMeasure = new FMeasure<Span>();
        }
Example #4
0
        /// <summary>
        /// Initializes a new instance of the <see cref="TokenNameFinderModel" /> class.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="nameFinderModel">The name finder model.</param>
        /// <param name="generatorDescriptor">The generator descriptor.</param>
        /// <param name="resources">The resources.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <param name="seqCodec">The sequence codec.</param>
        /// <param name="factory">The tool factory.</param>
        /// <exception cref="InvalidOperationException">Model not compatible with name finder!</exception>
        /// <exception cref="System.InvalidOperationException">Model not compatible with name finder!</exception>
        public TokenNameFinderModel(
            string languageCode,
            ISequenceClassificationModel<string> nameFinderModel,
            byte[] generatorDescriptor,
            Dictionary<string, object> resources,
            Dictionary<string, string> manifestInfoEntries,
            ISequenceCodec<string> seqCodec,
            TokenNameFinderFactory factory)
            : base(ComponentName, languageCode, manifestInfoEntries, factory) {
            Init(nameFinderModel, generatorDescriptor, resources, seqCodec);

            if (!seqCodec.AreOutcomesCompatible(nameFinderModel.GetOutcomes())) {
                throw new InvalidOperationException("Model not compatible with name finder!");
            }
        }
Example #5
0
        /// <summary>
        /// Initializes a new instance of the <see cref="TokenNameFinderModel" /> class.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="nameFinderModel">The name finder model.</param>
        /// <param name="generatorDescriptor">The generator descriptor.</param>
        /// <param name="resources">The resources.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <param name="seqCodec">The sequence codec.</param>
        /// <param name="factory">The tool factory.</param>
        /// <exception cref="InvalidOperationException">Model not compatible with name finder!</exception>
        /// <exception cref="System.InvalidOperationException">Model not compatible with name finder!</exception>
        public TokenNameFinderModel(
            string languageCode,
            ISequenceClassificationModel <string> nameFinderModel,
            byte[] generatorDescriptor,
            Dictionary <string, object> resources,
            Dictionary <string, string> manifestInfoEntries,
            ISequenceCodec <string> seqCodec,
            TokenNameFinderFactory factory)
            : base(ComponentName, languageCode, manifestInfoEntries, factory)
        {
            Init(nameFinderModel, generatorDescriptor, resources, seqCodec);

            if (!seqCodec.AreOutcomesCompatible(nameFinderModel.GetOutcomes()))
            {
                throw new InvalidOperationException("Model not compatible with name finder!");
            }
        }
Example #6
0
        /// <summary>
        /// Initializes a new instance of the <see cref="TokenNameFinderModel" /> class.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="nameFinderModel">The name finder model.</param>
        /// <param name="beamSize">Size of the beam.</param>
        /// <param name="generatorDescriptor">The generator descriptor.</param>
        /// <param name="resources">The resources.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <param name="sequenceCodec">The sequence codec.</param>
        /// <param name="factory">The tool factory.</param>
        /// <exception cref="InvalidOperationException">Model not compatible with name finder!</exception>
        /// <exception cref="System.InvalidOperationException">Model not compatible with name finder!</exception>
        public TokenNameFinderModel(
            string languageCode,
            IMaxentModel nameFinderModel,
            int beamSize,
            byte[] generatorDescriptor,
            Dictionary <string, object> resources,
            Dictionary <string, string> manifestInfoEntries,
            ISequenceCodec <string> sequenceCodec,
            TokenNameFinderFactory factory)
            : base(ComponentName, languageCode, manifestInfoEntries, factory)
        {
            Manifest[Parameters.BeamSize] = beamSize.ToString(CultureInfo.InvariantCulture);

            Init(nameFinderModel, generatorDescriptor, resources, sequenceCodec);

            if (!IsModelValid(nameFinderModel))
            {
                throw new InvalidOperationException("Model not compatible with name finder!");
            }
        }
Example #7
0
        /// <summary>
        /// Trains a name finder model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language of the training data.</param>
        /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param>
        /// <param name="samples">The training samples.</param>
        /// <param name="parameters">The machine learning train parameters.</param>
        /// <param name="factory">The name finder factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
        public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream <NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor)
        {
            var beamSize            = parameters.Get(Parameters.BeamSize, DefaultBeamSize);
            var manifestInfoEntries = new Dictionary <string, string>();
            var trainerType         = TrainerFactory.GetTrainerType(parameters);

            IMaxentModel meModel = null;

            ML.Model.ISequenceClassificationModel <string> seqModel = null;

            switch (trainerType)
            {
            case TrainerType.EventModelTrainer:
                var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(),
                                                            factory.CreateSequenceCodec());
                var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                meModel = nfTrainer.Train(eventStream);
                break;

            case TrainerType.EventModelSequenceTrainer:
                var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                var nsTrainer    = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                meModel = nsTrainer.Train(sampleStream);
                break;

            case TrainerType.SequenceTrainer:
                var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                var sqTrainer      = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);


                seqModel = sqTrainer.Train(sequenceStream);
                break;

            default:
                throw new InvalidOperationException("Unexpected trainer type!");
            }

            if (seqModel != null)
            {
                return(new TokenNameFinderModel(
                           languageCode,
                           seqModel,
                           factory.FeatureGenerator,
                           factory.Resources,
                           manifestInfoEntries,
                           factory.SequenceCodec,
                           factory));
            }

            return(new TokenNameFinderModel(
                       languageCode,
                       meModel,
                       beamSize,
                       factory.FeatureGenerator,
                       factory.Resources,
                       manifestInfoEntries,
                       factory.SequenceCodec,
                       factory));
        }
Example #8
0
        /// <summary>
        /// Trains a name finder model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language of the training data.</param>
        /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param>
        /// <param name="samples">The training samples.</param>
        /// <param name="parameters">The machine learning train parameters.</param>
        /// <param name="factory">The name finder factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
        public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) {
            var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize);
            var manifestInfoEntries = new Dictionary<string, string>();
            var trainerType = TrainerFactory.GetTrainerType(parameters);

            IMaxentModel meModel = null;
            ML.Model.ISequenceClassificationModel<string> seqModel = null;

            switch (trainerType) {
                case TrainerType.EventModelTrainer:
                    var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(),
                        factory.CreateSequenceCodec());
                    var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                    meModel = nfTrainer.Train(eventStream);
                    break;
                case TrainerType.EventModelSequenceTrainer:
                    var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                    var nsTrainer = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                    meModel = nsTrainer.Train(sampleStream);
                    break;
                case TrainerType.SequenceTrainer:
                    var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                    var sqTrainer = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);


                    seqModel = sqTrainer.Train(sequenceStream);
                    break;
                default:
                    throw new InvalidOperationException("Unexpected trainer type!");
            }

            if (seqModel != null) {
                return new TokenNameFinderModel(
                    languageCode,
                    seqModel,
                    factory.FeatureGenerator,
                    factory.Resources,
                    manifestInfoEntries,
                    factory.SequenceCodec,
                    factory);
            }

            return new TokenNameFinderModel(
                languageCode,
                meModel,
                beamSize,
                factory.FeatureGenerator,
                factory.Resources,
                manifestInfoEntries,
                factory.SequenceCodec,
                factory);
        }
Example #9
0
        /// <summary>
        /// Trains a name finder model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language of the training data.</param>
        /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param>
        /// <param name="samples">The training samples.</param>
        /// <param name="parameters">The machine learning train parameters.</param>
        /// <param name="factory">The name finder factory.</param>
        /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
        public static TokenNameFinderModel Train(
            string languageCode,
            string type,
            IObjectStream<NameSample> samples,
            TrainingParameters parameters,
            TokenNameFinderFactory factory) {

            return Train(languageCode, type, samples, parameters, factory, null);
        }
Example #10
0
 /// <summary>
 /// Trains a name finder model.
 /// </summary>
 /// <param name="languageCode">The language of the training data.</param>
 /// <param name="samples">The training samples.</param>
 /// <param name="parameters">The machine learning train parameters.</param>
 /// <param name="factory">The name finder factory.</param>
 /// <param name="monitor">
 /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
 /// This argument can be a <c>null</c> value.</param>
 /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
 public static TokenNameFinderModel Train(string languageCode, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) {
     return Train(languageCode, DefaultType, samples, parameters, factory, monitor);
 }
Example #11
0
        /// <summary>
        /// Initializes a new instance of the <see cref="TokenNameFinderModel" /> class.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="nameFinderModel">The name finder model.</param>
        /// <param name="beamSize">Size of the beam.</param>
        /// <param name="generatorDescriptor">The generator descriptor.</param>
        /// <param name="resources">The resources.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <param name="sequenceCodec">The sequence codec.</param>
        /// <param name="factory">The tool factory.</param>
        /// <exception cref="InvalidOperationException">Model not compatible with name finder!</exception>
        /// <exception cref="System.InvalidOperationException">Model not compatible with name finder!</exception>
        public TokenNameFinderModel(
            string languageCode,
            IMaxentModel nameFinderModel,
            int beamSize,
            byte[] generatorDescriptor,
            Dictionary<string, object> resources,
            Dictionary<string, string> manifestInfoEntries,
            ISequenceCodec<string> sequenceCodec,
            TokenNameFinderFactory factory)
            : base(ComponentName, languageCode, manifestInfoEntries, factory) {
            Manifest[Parameters.BeamSize] = beamSize.ToString(CultureInfo.InvariantCulture);

            Init(nameFinderModel, generatorDescriptor, resources, sequenceCodec);

            if (!IsModelValid(nameFinderModel))
                throw new InvalidOperationException("Model not compatible with name finder!");

        }