/// <summary>
        /// Determines whether the specified train parameters are valid.
        /// </summary>
        /// <param name="trainParams">The train parameters.</param>
        /// <returns><c>true</c> if the specified train parameters are valid; otherwise, <c>false</c>.</returns>
        public static bool IsValid(TrainingParameters trainParams)
        {
            if (!trainParams.IsValid())
            {
                return(false);
            }

            var algorithmName = trainParams.Get(Parameters.Algorithm);

            if (!(builtInTrainers.ContainsKey(algorithmName) || GetTrainerType(trainParams) != null))
            {
                return(false);
            }

            var dataIndexer = trainParams.Get(Parameters.DataIndexer);

            if (dataIndexer != null)
            {
                switch (dataIndexer)
                {
                case Parameters.DataIndexers.OnePass:
                case Parameters.DataIndexers.TwoPass:
                    break;

                default:
                    return(false);
                }
            }

            return(true);
        }
        /// <summary>
        /// Gets the event trainer.
        /// </summary>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="reportMap">The report map.</param>
        /// <param name="monitor">A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>The <see cref="IEventTrainer" /> trainer object.</returns>
        /// <exception cref="System.InvalidOperationException">
        /// Unable to retrieve the trainer from the training parameters.
        /// or
        /// The constructor of the trainer must have a standard constructor.
        /// </exception>
        public static IEventTrainer GetEventTrainer(TrainingParameters parameters, Dictionary <string, string> reportMap, Monitor monitor)
        {
            var algorithm = parameters.Get(Parameters.Algorithm);

            if (algorithm == null)
            {
                AbstractEventTrainer trainer = new GIS(monitor);
                trainer.Init(parameters, reportMap);
                return(trainer);
            }

            var trainerType = GetTrainerType(parameters);

            if (trainerType.HasValue && trainerType.Value == TrainerType.EventModelTrainer)
            {
                var type = GetTrainer(algorithm);

                if (type == null)
                {
                    throw new InvalidOperationException("Unable to retrieve the trainer from the training parameters.");
                }

                var ctor = type.GetConstructor(new [] { typeof(Monitor) });
                if (ctor == null)
                {
                    throw new InvalidOperationException("The constructor of the trainer must have a standard constructor.");
                }

                var trainer = (IEventTrainer)ctor.Invoke(new object[] { monitor });
                trainer.Init(parameters, reportMap);
                return(trainer);
            }

            return(null);
        }
        /// <summary>
        /// Gets the sequence model trainer.
        /// </summary>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="reportMap">The report map.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The <see cref="ISequenceTrainer"/> trainer object.</returns>
        /// <exception cref="System.InvalidOperationException">Trainer type couldn't be determined!</exception>
        public static ISequenceTrainer GetSequenceModelTrainer(TrainingParameters parameters, Dictionary <string, string> reportMap, Monitor monitor)
        {
            var trainerType = parameters.Get(Parameters.Algorithm);

            ISequenceTrainer trainer = null;

            if (trainerType != null)
            {
                if (builtInTrainers.ContainsKey(trainerType))
                {
                    trainer = CreateBuiltinTrainer <ISequenceTrainer>(trainerType, monitor);
                }
                if (customTrainers.ContainsKey(trainerType))
                {
                    trainer = CreateCustomTrainer <ISequenceTrainer>(trainerType, monitor);
                }
            }

            if (trainer == null)
            {
                throw new InvalidOperationException("Trainer type couldn't be determined!");
            }

            trainer.Init(parameters, reportMap);
            return(trainer);
        }
Exemple #4
0
        /// <summary>
        /// Trains a name finder model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language of the training data.</param>
        /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param>
        /// <param name="samples">The training samples.</param>
        /// <param name="parameters">The machine learning train parameters.</param>
        /// <param name="factory">The name finder factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
        public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream <NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor)
        {
            var beamSize            = parameters.Get(Parameters.BeamSize, DefaultBeamSize);
            var manifestInfoEntries = new Dictionary <string, string>();
            var trainerType         = TrainerFactory.GetTrainerType(parameters);

            IMaxentModel meModel = null;

            ML.Model.ISequenceClassificationModel <string> scModel = null;

            switch (trainerType)
            {
            case TrainerType.EventModelTrainer:
                var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(),
                                                            factory.CreateSequenceCodec());
                var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                meModel = nfTrainer.Train(eventStream);
                break;

            case TrainerType.EventModelSequenceTrainer:
                var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                var nsTrainer    = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                meModel = nsTrainer.Train(sampleStream);
                break;

            case TrainerType.SequenceTrainer:
                var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                var sqTrainer      = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);


                scModel = sqTrainer.Train(sequenceStream);
                break;

            default:
                throw new InvalidOperationException("Unexpected trainer type!");
            }

            if (scModel != null)
            {
                return(new TokenNameFinderModel(
                           languageCode,
                           scModel,
                           factory.FeatureGenerator,
                           factory.Resources,
                           manifestInfoEntries,
                           factory.SequenceCodec));
            }

            return(new TokenNameFinderModel(
                       languageCode,
                       meModel,
                       beamSize,
                       factory.FeatureGenerator,
                       factory.Resources,
                       manifestInfoEntries,
                       factory.SequenceCodec));
        }
Exemple #5
0
        /// <summary>
        /// Gets the parameter from the train parameters.
        /// </summary>
        /// <param name="key">The param key.</param>
        /// <param name="defaultValue">The default value.</param>
        /// <returns>A value or the <paramref name="defaultValue"/>.</returns>
        protected string GetStringParam(string key, string defaultValue)
        {
            var value = trainParams.Get(key) ?? defaultValue;

            if (reportMap != null)
            {
                reportMap[key] = value;
            }

            return(value);
        }
        /// <summary>
        /// Trains a lemmatizer model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training
        /// operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="LemmatizerModel" /> object.</returns>
        /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static LemmatizerModel Train(string languageCode, IObjectStream <LemmaSample> samples, TrainingParameters parameters, LemmatizerFactory factory, Monitor monitor)
        {
            var manifestInfoEntries = new Dictionary <string, string>();
            var beamSize            = parameters.Get(Parameters.BeamSize, DefaultBeamSize);
            var cg = factory.GetContextGenerator();


            var trainerType = TrainerFactory.GetTrainerType(parameters);

            if (!trainerType.HasValue)
            {
                throw new InvalidOperationException("The trainer was not specified.");
            }


            IMaxentModel model = null;

            ML.Model.ISequenceClassificationModel <string> seqModel = null;

            switch (trainerType)
            {
            case TrainerType.EventModelTrainer:
                var s1 = new LemmaSampleEventStream(samples, cg);
                var t1 = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                model = t1.Train(s1);
                break;

            case TrainerType.EventModelSequenceTrainer:
                var s2 = new LemmaSampleSequenceStream(samples, cg);
                var t2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                model = t2.Train(s2);
                break;

            case TrainerType.SequenceTrainer:
                var s3 = new LemmaSampleSequenceStream(samples, cg);
                var t3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                seqModel = t3.Train(s3);
                break;

            default:
                throw new NotSupportedException("Trainer type is not supported.");
            }

            return(model != null
                ? new LemmatizerModel(languageCode, model, beamSize, manifestInfoEntries, factory)
                : new LemmatizerModel(languageCode, seqModel, manifestInfoEntries, factory));
        }
Exemple #7
0
        /// <summary>
        /// Trains a chunker model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>The trained <see cref="ChunkerModel"/> object.</returns>
        /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ChunkerModel Train(string languageCode, IObjectStream <ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory, Monitor monitor)
        {
            var trainerType = TrainerFactory.GetTrainerType(parameters);

            if (!trainerType.HasValue)
            {
                throw new InvalidOperationException("The trainer was not specified.");
            }

            var manifestInfoEntries = new Dictionary <string, string>();



            IMaxentModel chunkerModel = null;

            ML.Model.ISequenceClassificationModel <string> seqChunkerModel = null;

            switch (trainerType)
            {
            case TrainerType.SequenceTrainer:
                var st = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                // TODO: This will probably cause issue, since the feature generator uses the outcomes array

                var ss = new ChunkSampleSequenceStream(samples, factory.GetContextGenerator());

                seqChunkerModel = st.Train(ss);
                break;

            case TrainerType.EventModelTrainer:
                var es = new ChunkerEventStream(samples, factory.GetContextGenerator());
                var et = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                chunkerModel = et.Train(es);
                break;

            default:
                throw new NotSupportedException("Trainer type is not supported.");
            }

            var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize);

            return(chunkerModel != null
                ? new ChunkerModel(languageCode, chunkerModel, beamSize, manifestInfoEntries, factory)
                : new ChunkerModel(languageCode, seqChunkerModel, manifestInfoEntries, factory));
        }
        /// <summary>
        /// Gets the type of the trainer from the <see cref="TrainingParameters"/> object.
        /// </summary>
        /// <param name="trainParams">The train parameters.</param>
        /// <returns>A nullable <see cref="TrainerType"/> value.</returns>
        public static TrainerType?GetTrainerType(TrainingParameters trainParams)
        {
            var algorithm = trainParams.Get(Parameters.Algorithm);

            if (algorithm == null)
            {
                return(TrainerType.EventModelTrainer);
            }

            Type trainerType = null;

            if (builtInTrainers.ContainsKey(algorithm))
            {
                trainerType = builtInTrainers[algorithm];
            }
            else if (customTrainers.ContainsKey(algorithm))
            {
                trainerType = customTrainers[algorithm];
            }

            return(GetTrainerType(trainerType));
        }
        /// <summary>
        /// Gets the event model sequence trainer.
        /// </summary>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="reportMap">The report map.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The <see cref="IEventModelSequenceTrainer"/> trainer object.</returns>
        /// <exception cref="System.InvalidOperationException">Trainer type couldn't be determined!</exception>
        public static IEventModelSequenceTrainer GetEventModelSequenceTrainer(TrainingParameters parameters, Dictionary <string, string> reportMap, Monitor monitor)
        {
            var trainerType = parameters.Get(Parameters.Algorithm);

            if (!string.IsNullOrEmpty(trainerType))
            {
                if (builtInTrainers.ContainsKey(trainerType))
                {
                    var trainer = CreateBuiltinTrainer <IEventModelSequenceTrainer>(trainerType, monitor);
                    trainer.Init(parameters, reportMap);
                    return(trainer);
                }

                if (customTrainers.ContainsKey(trainerType))
                {
                    var type     = customTrainers[trainerType];
                    var trainer2 = (IEventModelSequenceTrainer)Activator.CreateInstance(type, monitor);
                    trainer2.Init(parameters, reportMap);
                    return(trainer2);
                }
            }

            throw new InvalidOperationException("Trainer type couldn't be determined!");
        }
Exemple #10
0
        /// <summary>
        /// Gets the event trainer.
        /// </summary>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="reportMap">The report map.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The <see cref="IEventTrainer"/> trainer object.</returns>
        public static IEventTrainer GetEventTrainer(TrainingParameters parameters, Dictionary <string, string> reportMap, Monitor monitor)
        {
            var algorithm = parameters.Get(Parameters.Algorithm);

            if (algorithm == null)
            {
                AbstractEventTrainer trainer = new GIS(monitor);
                trainer.Init(parameters, reportMap);
                return(trainer);
            }

            var trainerType = GetTrainerType(parameters);

            if (trainerType.HasValue && trainerType.Value == TrainerType.EventModelTrainer)
            {
                var type = GetTrainer(algorithm);

                var trainer = (IEventTrainer)Activator.CreateInstance(type, monitor);
                trainer.Init(parameters, reportMap);
                return(trainer);
            }

            return(null);
        }
Exemple #11
0
        /// <summary>
        /// Creates a n-gram dictionary from the specified data stream using the specified head rule and specified cut-off.
        /// </summary>
        /// <param name="data">The data stream of parses.</param>
        /// <param name="rules">The head rules for the parses.</param>
        /// <param name="parameters">Can contain a cutoff, the minimum number of entries required for the n-gram to be saved as part of the dictionary.</param>
        /// <returns>A dictionary object.</returns>
        public static Dic BuildDictionary(IObjectStream <Parse> data, AbstractHeadRules rules, TrainingParameters parameters)
        {
            var cutoff = parameters.Get("dict", Parameters.Cutoff, 5);
            var dict   = new NGramModel();

            Parse p;

            while ((p = data.Read()) != null)
            {
                p.UpdateHeads(rules);
                var pWords = p.GetTagNodes();
                var words  = new string[pWords.Length];
                //add all uni-grams
                for (var wi = 0; wi < words.Length; wi++)
                {
                    words[wi] = pWords[wi].CoveredText;
                }

                dict.Add(new StringList(words), 1, 1);
                //add tri-grams and bi-grams for initial sequence
                var chunks = CollapsePunctuation(AbstractParserEventStream.GetInitialChunks(p), rules.PunctuationTags);
                var cWords = new string[chunks.Length];
                for (var wi = 0; wi < cWords.Length; wi++)
                {
                    cWords[wi] = chunks[wi].Head.CoveredText;
                }
                dict.Add(new StringList(cWords), 2, 3);

                //emulate reductions to produce additional n-grams
                var ci = 0;
                while (ci < chunks.Length)
                {
                    /*
                     * if (chunks[ci].Parent == null) {
                     *  chunks[ci].Show();
                     * } */
                    if (LastChild(chunks[ci], chunks[ci].Parent, rules.PunctuationTags))
                    {
                        //perform reduce
                        var reduceStart = ci;
                        while (reduceStart >= 0 && Equals(chunks[reduceStart].Parent, chunks[ci].Parent))
                        {
                            reduceStart--;
                        }
                        reduceStart++;
                        chunks = ParserEventStream.ReduceChunks(chunks, ref ci, chunks[ci].Parent);
                        ci     = reduceStart;
                        if (chunks.Length != 0)
                        {
                            var window = new string[5];
                            var wi     = 0;
                            if (ci - 2 >= 0)
                            {
                                window[wi++] = chunks[ci - 2].Head.CoveredText;
                            }
                            if (ci - 1 >= 0)
                            {
                                window[wi++] = chunks[ci - 1].Head.CoveredText;
                            }
                            window[wi++] = chunks[ci].Head.CoveredText;
                            if (ci + 1 < chunks.Length)
                            {
                                window[wi++] = chunks[ci + 1].Head.CoveredText;
                            }
                            if (ci + 2 < chunks.Length)
                            {
                                window[wi++] = chunks[ci + 2].Head.CoveredText;
                            }
                            if (wi < 5)
                            {
                                var subWindow = new string[wi];
                                for (var swi = 0; swi < wi; swi++)
                                {
                                    subWindow[swi] = window[swi];
                                }
                                window = subWindow;
                            }
                            if (window.Length >= 3)
                            {
                                dict.Add(new StringList(window), 2, 3);
                            }
                            else if (window.Length == 2)
                            {
                                dict.Add(new StringList(window), 2, 2);
                            }
                        }
                        ci = reduceStart - 1; //ci will be incremented at end of loop
                    }
                    ci++;
                }
            }
            dict.CutOff(cutoff, int.MaxValue);
            return(dict.ToDictionary(true));
        }
Exemple #12
0
        /// <summary>
        /// Trains a name finder model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language of the training data.</param>
        /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param>
        /// <param name="samples">The training samples.</param>
        /// <param name="parameters">The machine learning train parameters.</param>
        /// <param name="factory">The name finder factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
        public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) {
            var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize);
            var manifestInfoEntries = new Dictionary<string, string>();
            var trainerType = TrainerFactory.GetTrainerType(parameters);

            IMaxentModel meModel = null;
            ML.Model.ISequenceClassificationModel<string> seqModel = null;

            switch (trainerType) {
                case TrainerType.EventModelTrainer:
                    var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(),
                        factory.CreateSequenceCodec());
                    var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                    meModel = nfTrainer.Train(eventStream);
                    break;
                case TrainerType.EventModelSequenceTrainer:
                    var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                    var nsTrainer = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                    meModel = nsTrainer.Train(sampleStream);
                    break;
                case TrainerType.SequenceTrainer:
                    var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                    var sqTrainer = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);


                    seqModel = sqTrainer.Train(sequenceStream);
                    break;
                default:
                    throw new InvalidOperationException("Unexpected trainer type!");
            }

            if (seqModel != null) {
                return new TokenNameFinderModel(
                    languageCode,
                    seqModel,
                    factory.FeatureGenerator,
                    factory.Resources,
                    manifestInfoEntries,
                    factory.SequenceCodec,
                    factory);
            }

            return new TokenNameFinderModel(
                languageCode,
                meModel,
                beamSize,
                factory.FeatureGenerator,
                factory.Resources,
                manifestInfoEntries,
                factory.SequenceCodec,
                factory);
        }
        /// <summary>
        /// Creates a n-gram dictionary from the specified data stream using the specified head rule and specified cut-off.
        /// </summary>
        /// <param name="data">The data stream of parses.</param>
        /// <param name="rules">The head rules for the parses.</param>
        /// <param name="parameters">Can contain a cutoff, the minimum number of entries required for the n-gram to be saved as part of the dictionary.</param>
        /// <returns>A dictionary object.</returns>
        public static Dic BuildDictionary(IObjectStream<Parse> data, AbstractHeadRules rules, TrainingParameters parameters) {
            var cutoff = parameters.Get("dict", Parameters.Cutoff, 5);
            var dict = new NGramModel();

            Parse p;
            while ((p = data.Read()) != null) {
                p.UpdateHeads(rules);
                var pWords = p.GetTagNodes();
                var words = new string[pWords.Length];
                //add all uni-grams
                for (var wi = 0; wi < words.Length; wi++) {
                    words[wi] = pWords[wi].CoveredText;
                }

                dict.Add(new StringList(words), 1, 1);
                //add tri-grams and bi-grams for initial sequence
                var chunks = CollapsePunctuation(AbstractParserEventStream.GetInitialChunks(p), rules.PunctuationTags);
                var cWords = new string[chunks.Length];
                for (var wi = 0; wi < cWords.Length; wi++) {
                    cWords[wi] = chunks[wi].Head.CoveredText;
                }
                dict.Add(new StringList(cWords), 2, 3);

                //emulate reductions to produce additional n-grams
                var ci = 0;
                while (ci < chunks.Length) {
                    /*
                    if (chunks[ci].Parent == null) {
                        chunks[ci].Show();
                    } */
                    if (LastChild(chunks[ci], chunks[ci].Parent, rules.PunctuationTags)) {
                        //perform reduce
                        var reduceStart = ci;
                        while (reduceStart >= 0 && Equals(chunks[reduceStart].Parent, chunks[ci].Parent)) {
                            reduceStart--;
                        }
                        reduceStart++;
                        chunks = ParserEventStream.ReduceChunks(chunks, ref ci, chunks[ci].Parent);
                        ci = reduceStart;
                        if (chunks.Length != 0) {
                            var window = new string[5];
                            var wi = 0;
                            if (ci - 2 >= 0) window[wi++] = chunks[ci - 2].Head.CoveredText;
                            if (ci - 1 >= 0) window[wi++] = chunks[ci - 1].Head.CoveredText;
                            window[wi++] = chunks[ci].Head.CoveredText;
                            if (ci + 1 < chunks.Length) window[wi++] = chunks[ci + 1].Head.CoveredText;
                            if (ci + 2 < chunks.Length) window[wi++] = chunks[ci + 2].Head.CoveredText;
                            if (wi < 5) {
                                var subWindow = new string[wi];
                                for (var swi = 0; swi < wi; swi++) {
                                    subWindow[swi] = window[swi];
                                }
                                window = subWindow;
                            }
                            if (window.Length >= 3) {
                                dict.Add(new StringList(window), 2, 3);
                            } else if (window.Length == 2) {
                                dict.Add(new StringList(window), 2, 2);
                            }
                        }
                        ci = reduceStart - 1; //ci will be incremented at end of loop
                    }
                    ci++;
                }
            }
            dict.CutOff(cutoff, int.MaxValue);
            return dict.ToDictionary(true);
        }
Exemple #14
0
        /// <summary>
        /// Trains a chunker model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>The trained <see cref="ChunkerModel"/> object.</returns>
        /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ChunkerModel Train(string languageCode, IObjectStream<ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory, Monitor monitor) {

            var trainerType = TrainerFactory.GetTrainerType(parameters);
            if (!trainerType.HasValue) {
                throw new InvalidOperationException("The trainer was not specified.");
            }

            var manifestInfoEntries = new Dictionary<string, string>();

            

            IMaxentModel chunkerModel = null;
            ML.Model.ISequenceClassificationModel<string> seqChunkerModel = null;

            switch (trainerType) {
                case TrainerType.SequenceTrainer:
                    var st = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                    // TODO: This will probably cause issue, since the feature generator uses the outcomes array

                    var ss = new ChunkSampleSequenceStream(samples, factory.GetContextGenerator());

                    seqChunkerModel = st.Train(ss);
                    break;
                case TrainerType.EventModelTrainer:
                    var es = new ChunkerEventStream(samples, factory.GetContextGenerator());
                    var et = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                    chunkerModel = et.Train(es);
                    break;
                default:
                    throw new NotSupportedException("Trainer type is not supported.");
            }

            var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize);

            return chunkerModel != null
                ? new ChunkerModel(languageCode, chunkerModel, beamSize, manifestInfoEntries, factory) 
                : new ChunkerModel(languageCode, seqChunkerModel, manifestInfoEntries, factory);
        }