コード例 #1
0
 public ParserEventStream(IObjectStream <Parse> d, AbstractHeadRules rules, ParserEventTypeEnum type)
     : base(d, rules, type)
 {
     buildContextGenerator  = new BuildContextGenerator();
     attachContextGenerator = new AttachContextGenerator(Punctuation);
     checkContextGenerator  = new CheckContextGenerator(Punctuation);
 }
コード例 #2
0
        private Parser(
            IMaxentModel buildModel,
            IMaxentModel attachModel,
            IMaxentModel checkModel,
            IPOSTagger tagger,
            IChunker chunker,
            AbstractHeadRules headRules,
            int beamSize,
            double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage)
        {
            this.buildModel  = buildModel;
            this.attachModel = attachModel;
            this.checkModel  = checkModel;

            buildContextGenerator  = new BuildContextGenerator();
            attachContextGenerator = new AttachContextGenerator(punctSet);
            checkContextGenerator  = new CheckContextGenerator(punctSet);

            bProbs = new double[buildModel.GetNumOutcomes()];
            aProbs = new double[attachModel.GetNumOutcomes()];
            cProbs = new double[checkModel.GetNumOutcomes()];

            doneIndex           = buildModel.GetIndex(DONE);
            sisterAttachIndex   = attachModel.GetIndex(ATTACH_SISTER);
            daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER);
            // nonAttachIndex = attachModel.GetIndex(NON_ATTACH);
            attachments   = new[] { daughterAttachIndex, sisterAttachIndex };
            completeIndex = checkModel.GetIndex(COMPLETE);
        }
コード例 #3
0
ファイル: Parser.cs プロジェクト: qooba/SharpNL
 private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker,
                AbstractHeadRules headRules, int beamSize, double advancePercentage) :
     base(tagger, chunker, headRules, beamSize, advancePercentage)
 {
     this.buildModel       = buildModel;
     this.checkModel       = checkModel;
     bProbs                = new double[buildModel.GetNumOutcomes()];
     cProbs                = new double[checkModel.GetNumOutcomes()];
     buildContextGenerator = new BuildContextGenerator();
     checkContextGenerator = new CheckContextGenerator();
     startTypeMap          = new Dictionary <string, string>();
     contTypeMap           = new Dictionary <string, string>();
     for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++)
     {
         var outcome = buildModel.GetOutcome(boi);
         if (outcome.StartsWith(START))
         {
             startTypeMap[outcome] = outcome.Substring(START.Length);
         }
         else if (outcome.StartsWith(CONT))
         {
             contTypeMap[outcome] = outcome.Substring(CONT.Length);
         }
     }
     topStartIndex   = buildModel.GetIndex(TOP_START);
     completeIndex   = checkModel.GetIndex(COMPLETE);
     incompleteIndex = checkModel.GetIndex(INCOMPLETE);
 }
コード例 #4
0
ファイル: Parser.cs プロジェクト: qooba/SharpNL
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <returns>The trained <see cref="ParserModel" /> object.</returns>
        public static ParserModel Train(
            Monitor monitor,
            string languageCode,
            IObjectStream <Parse> samples,
            AbstractHeadRules rules,
            TrainingParameters parameters)
        {
            var dict = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            var manifestInfoEntries = new Dictionary <string, string>();

            // build
            //System.err.println("Training builder");
            var bes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict);
            var buildReportMap = new Dictionary <string, string>();
            var buildTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);


            var buildModel = buildTrainer.Train(bes);

            MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // tag
            var posTaggerParams = parameters.GetNamespace("tagger");

            if (!posTaggerParams.Contains(Parameters.BeamSize))
            {
                posTaggerParams.Set(Parameters.BeamSize, "10");
            }


            var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples),
                                             parameters.GetNamespace("tagger"), new POSTaggerFactory());

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(languageCode,
                                             new ChunkSampleStream(samples),
                                             parameters.GetNamespace("chunker"),
                                             new ParserChunkerFactory());

            samples.Reset();

            // check
            //System.err.println("Training checker");
            var kes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary <string, string>();
            var checkTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);

            MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            return(new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries));
        }
コード例 #5
0
ファイル: Parser.cs プロジェクト: qooba/SharpNL
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="iterations">The number of training iterations.</param>
        /// <param name="cutoff">The min number of times a feature must be seen.</param>
        /// <returns>The trained <see cref="ParserModel" /> object.</returns>
        public static ParserModel Train(
            Monitor monitor,
            string languageCode,
            IObjectStream <Parse> samples,
            AbstractHeadRules rules,
            int iterations,
            int cutoff)
        {
            var param = new TrainingParameters();

            param.Set("dict", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));

            param.Set("tagger", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("tagger", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("chunker", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("chunker", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("check", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("check", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("build", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("build", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            return(Train(monitor, languageCode, samples, rules, param));
        }
コード例 #6
0
        public ParserEventStream(IObjectStream<Parse> d, AbstractHeadRules rules, ParserEventTypeEnum type)
            : base(d, rules, type) {

            buildContextGenerator = new BuildContextGenerator();
            attachContextGenerator = new AttachContextGenerator(Punctuation);
            checkContextGenerator = new CheckContextGenerator(Punctuation);
        }
コード例 #7
0
 /// <summary>
 /// Trains a parser model with the given parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="samples">The data samples.</param>
 /// <param name="rules">The head rules.</param>
 /// <param name="parameters">The machine learnable parameters.</param>
 /// <returns>The trained <see cref="ParserModel"/> object.</returns>
 /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
 public static ParserModel Train(
     string languageCode,
     IObjectStream <Parse> samples,
     AbstractHeadRules rules,
     TrainingParameters parameters)
 {
     return(Train(languageCode, samples, rules, parameters, null));
 }
コード例 #8
0
ファイル: Parser.cs プロジェクト: qooba/SharpNL
 /// <summary>
 /// Trains a parser model with the given parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="samples">The data samples.</param>
 /// <param name="rules">The head rules.</param>
 /// <param name="iterations">The number of training iterations.</param>
 /// <param name="cutoff">The min number of times a feature must be seen.</param>
 /// <returns>The trained <see cref="ParserModel" /> object.</returns>
 public static ParserModel Train(
     string languageCode,
     IObjectStream <Parse> samples,
     AbstractHeadRules rules,
     int iterations,
     int cutoff)
 {
     return(Train(null, languageCode, samples, rules, iterations, cutoff));
 }
コード例 #9
0
        public static void Serialize(AbstractHeadRules headRules, Stream outputStream) {
            if (headRules == null)
                throw new ArgumentNullException("headRules");

            if (outputStream == null)
                throw new ArgumentNullException("outputStream");

            if (!outputStream.CanWrite)
                throw new ArgumentException(@"Stream was not writable.", "outputStream");


            headRules.Serialize(new StreamWriter(outputStream, Encoding.UTF8));
        }
コード例 #10
0
        /// <summary>
        /// Create an event stream based on the specified data stream of the specified type using the specified head rules.
        /// </summary>
        /// <param name="samples">A 1-parse-per-line Penn Treebank Style parse.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="eType">The type of events desired (tag, chunk, build, or check).</param>
        /// <param name="dictionary">A tri-gram dictionary to reduce feature generation.</param>
        public ParserEventStream(IObjectStream<Parse> samples, AbstractHeadRules rules, ParserEventTypeEnum eType,
            Dictionary.Dictionary dictionary)
            : base(samples, rules, eType, dictionary) {

            switch (eType) {
                case ParserEventTypeEnum.Build:
                    bcg = new BuildContextGenerator(dictionary);
                    break;
                case ParserEventTypeEnum.Check:
                    kcg = new CheckContextGenerator();
                    break;
            }

        }
コード例 #11
0
ファイル: ParserEventStream.cs プロジェクト: qooba/SharpNL
        /// <summary>
        /// Create an event stream based on the specified data stream of the specified type using the specified head rules.
        /// </summary>
        /// <param name="samples">A 1-parse-per-line Penn Treebank Style parse.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="eType">The type of events desired (tag, chunk, build, or check).</param>
        /// <param name="dictionary">A tri-gram dictionary to reduce feature generation.</param>
        public ParserEventStream(IObjectStream <Parse> samples, AbstractHeadRules rules, ParserEventTypeEnum eType,
                                 Dictionary.Dictionary dictionary)
            : base(samples, rules, eType, dictionary)
        {
            switch (eType)
            {
            case ParserEventTypeEnum.Build:
                bcg = new BuildContextGenerator(dictionary);
                break;

            case ParserEventTypeEnum.Check:
                kcg = new CheckContextGenerator();
                break;
            }
        }
コード例 #12
0
        public static void Serialize(AbstractHeadRules headRules, Stream outputStream)
        {
            if (headRules == null)
            {
                throw new ArgumentNullException(nameof(headRules));
            }

            if (outputStream == null)
            {
                throw new ArgumentNullException(nameof(outputStream));
            }

            if (!outputStream.CanWrite)
            {
                throw new ArgumentException(@"Stream was not writable.", nameof(outputStream));
            }


            headRules.Serialize(new StreamWriter(outputStream, Encoding.UTF8));
        }
コード例 #13
0
ファイル: Parser.cs プロジェクト: lovethisgame/SharpNL
 private Parser(IMaxentModel buildModel, IMaxentModel checkModel, IPOSTagger tagger, IChunker chunker,
     AbstractHeadRules headRules, int beamSize, double advancePercentage) :
         base(tagger, chunker, headRules, beamSize, advancePercentage) {
     this.buildModel = buildModel;
     this.checkModel = checkModel;
     bProbs = new double[buildModel.GetNumOutcomes()];
     cProbs = new double[checkModel.GetNumOutcomes()];
     buildContextGenerator = new BuildContextGenerator();
     checkContextGenerator = new CheckContextGenerator();
     startTypeMap = new Dictionary<string, string>();
     contTypeMap = new Dictionary<string, string>();
     for (int boi = 0, bon = buildModel.GetNumOutcomes(); boi < bon; boi++) {
         var outcome = buildModel.GetOutcome(boi);
         if (outcome.StartsWith(START)) {
             startTypeMap[outcome] = outcome.Substring(START.Length);
         } else if (outcome.StartsWith(CONT)) {
             contTypeMap[outcome] = outcome.Substring(CONT.Length);
         }
     }
     topStartIndex = buildModel.GetIndex(TOP_START);
     completeIndex = checkModel.GetIndex(COMPLETE);
     incompleteIndex = checkModel.GetIndex(INCOMPLETE);
 }
コード例 #14
0
ファイル: Parser.cs プロジェクト: lovethisgame/SharpNL
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="iterations">The number of training iterations.</param>
        /// <param name="cutoff">The min number of times a feature must be seen.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, int iterations, int cutoff, Monitor monitor) {

            var param = new TrainingParameters();

            param.Set("dict", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));

            param.Set("tagger", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("tagger", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("chunker", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("chunker", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("check", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("check", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("build", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("build", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            return Train(languageCode, samples, rules, param, monitor);
        }
コード例 #15
0
ファイル: Parser.cs プロジェクト: lovethisgame/SharpNL
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="iterations">The number of training iterations.</param>
        /// <param name="cutoff">The min number of times a feature must be seen.</param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules,
            int iterations, int cutoff) {

            return Train(languageCode, samples, rules, iterations, cutoff, null);
        }
コード例 #16
0
ファイル: Parser.cs プロジェクト: lovethisgame/SharpNL
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(
            string languageCode,
            IObjectStream<Parse> samples, 
            AbstractHeadRules rules, 
            TrainingParameters parameters,
            Monitor monitor) {

            var manifestInfoEntries = new Dictionary<string, string>();

#if DEBUG
            System.Diagnostics.Debug.Print("Building dictionary");
#endif

            var dictionary = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            // tag
            var posModel = POSTaggerME.Train(
                languageCode,
                new PosSampleStream(samples),
                parameters.GetNamespace("tagger"),
                new POSTaggerFactory(), monitor);

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(
                languageCode,
                new ChunkSampleStream(samples),
                parameters.GetNamespace("chunker"),
                new ParserChunkerFactory(), 
                monitor);

            samples.Reset();

            // build

#if DEBUG
            System.Diagnostics.Debug.Print("Training builder");
#endif

            var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dictionary);
            var buildReportMap = new Dictionary<string, string>();
            var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);

            var buildModel = buildTrainer.Train(bes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // check
#if DEBUG
            System.Diagnostics.Debug.Print("Training checker");
#endif
            var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary<string, string>();

            var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            samples.Reset();

            // attach
#if DEBUG
            System.Diagnostics.Debug.Print("Training attacher");
#endif
            var attachEvents = new ParserEventStream(samples, rules, ParserEventTypeEnum.Attach);
            var attachReportMap = new Dictionary<string, string>();

            var attachTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("attach"), attachReportMap, monitor);

            var attachModel = attachTrainer.Train(attachEvents);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach");

            return new ParserModel(
                languageCode,
                buildModel,
                checkModel,
                attachModel,
                posModel,
                chunkModel,
                rules,
                ParserType.TreeInsert,
                manifestInfoEntries);
        }
コード例 #17
0
ファイル: Parser.cs プロジェクト: lovethisgame/SharpNL
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(
            string languageCode,
            IObjectStream<Parse> samples,
            AbstractHeadRules rules,
            TrainingParameters parameters) {

            return Train(languageCode, samples, rules, parameters, null);
        }
コード例 #18
0
ファイル: Parser.cs プロジェクト: lovethisgame/SharpNL
        private Parser(
            IMaxentModel buildModel,
            IMaxentModel attachModel, 
            IMaxentModel checkModel,
            IPOSTagger tagger,
            IChunker chunker, 
            AbstractHeadRules headRules, 
            int beamSize, 
            double advancePercentage) : base(tagger, chunker, headRules, beamSize, advancePercentage) {

            this.buildModel = buildModel;
            this.attachModel = attachModel;
            this.checkModel = checkModel;

            buildContextGenerator = new BuildContextGenerator();
            attachContextGenerator = new AttachContextGenerator(punctSet);
            checkContextGenerator = new CheckContextGenerator(punctSet);

            bProbs = new double[buildModel.GetNumOutcomes()];
            aProbs = new double[attachModel.GetNumOutcomes()];
            cProbs = new double[checkModel.GetNumOutcomes()];

            doneIndex = buildModel.GetIndex(DONE);
            sisterAttachIndex = attachModel.GetIndex(ATTACH_SISTER);
            daughterAttachIndex = attachModel.GetIndex(ATTACH_DAUGHTER);
            // nonAttachIndex = attachModel.GetIndex(NON_ATTACH);
            attachments = new[] {daughterAttachIndex, sisterAttachIndex};
            completeIndex = checkModel.GetIndex(COMPLETE);
        }
コード例 #19
0
ファイル: Parser.cs プロジェクト: qooba/SharpNL
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 public Parser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
               double advancePercentage)
     : base(tagger, chunker, headRules, beamSize, advancePercentage)
 {
 }
コード例 #20
0
ファイル: ParserEventStream.cs プロジェクト: qooba/SharpNL
 /// <summary>
 /// Create an event stream based on the specified data stream of the specified type using the specified head rules.
 /// </summary>
 /// <param name="samples">A 1-parse-per-line Penn Treebank Style parse.</param>
 /// <param name="rules">The head rules.</param>
 /// <param name="eType">The type of events desired (tag, chunk, build, or check).</param>
 public ParserEventStream(IObjectStream <Parse> samples, AbstractHeadRules rules, ParserEventTypeEnum eType)
     : this(samples, rules, eType, null)
 {
 }
コード例 #21
0
ファイル: Parser.cs プロジェクト: lovethisgame/SharpNL
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <returns>The trained <see cref="ParserModel" /> object.</returns>
        public static ParserModel Train(
            Monitor monitor,
            string languageCode, 
            IObjectStream<Parse> samples, 
            AbstractHeadRules rules,
            TrainingParameters parameters) {

            var dict = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            var manifestInfoEntries = new Dictionary<string, string>();

            // build
            //System.err.println("Training builder");
            var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict);
            var buildReportMap = new Dictionary<string, string>();
            var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);


            var buildModel = buildTrainer.Train(bes);

            MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // tag
            var posTaggerParams = parameters.GetNamespace("tagger");
            if (!posTaggerParams.Contains(Parameters.BeamSize))
                posTaggerParams.Set(Parameters.BeamSize, "10");


            var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples),
                parameters.GetNamespace("tagger"), new POSTaggerFactory());

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(languageCode, 
                new ChunkSampleStream(samples),
                parameters.GetNamespace("chunker"),
                new ParserChunkerFactory());

            samples.Reset();

            // check
            //System.err.println("Training checker");
            var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary<string, string>();
            var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);
            MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            return new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries);
        }
コード例 #22
0
ファイル: Parser.cs プロジェクト: lovethisgame/SharpNL
 /// <summary>
 /// Initializes a new instance of the <see cref="AbstractBottomUpParser"/>.
 /// </summary>
 /// <param name="tagger">The pos-tagger that the parser uses.</param>
 /// <param name="chunker">The chunker that the parser uses to chunk non-recursive structures.</param>
 /// <param name="headRules">The head rules for the parser.</param>
 /// <param name="beamSize">Size of the beam.</param>
 /// <param name="advancePercentage">The advance percentage.</param>
 public Parser(IPOSTagger tagger, IChunker chunker, AbstractHeadRules headRules, int beamSize,
     double advancePercentage)
     : base(tagger, chunker, headRules, beamSize, advancePercentage) {}
コード例 #23
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(
            string languageCode,
            IObjectStream <Parse> samples,
            AbstractHeadRules rules,
            TrainingParameters parameters,
            Monitor monitor)
        {
            var manifestInfoEntries = new Dictionary <string, string>();

            System.Diagnostics.Debug.Print("Building dictionary");

            var dictionary = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            // tag
            var posModel = POSTaggerME.Train(
                languageCode,
                new PosSampleStream(samples),
                parameters.GetNamespace("tagger"),
                new POSTaggerFactory(), monitor);

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(
                languageCode,
                new ChunkSampleStream(samples),
                parameters.GetNamespace("chunker"),
                new ChunkerFactory(), monitor);

            samples.Reset();

            // build
            System.Diagnostics.Debug.Print("Training builder");
            var bes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dictionary);
            var buildReportMap = new Dictionary <string, string>();
            var buildTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);

            var buildModel = buildTrainer.Train(bes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // check
            System.Diagnostics.Debug.Print("Training checker");
            var kes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary <string, string>();

            var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            samples.Reset();

            // attach
            System.Diagnostics.Debug.Print("Training attacher");
            var attachEvents    = new ParserEventStream(samples, rules, ParserEventTypeEnum.Attach);
            var attachReportMap = new Dictionary <string, string>();

            var attachTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("attach"), attachReportMap, monitor);

            var attachModel = attachTrainer.Train(attachEvents);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach");

            return(new ParserModel(
                       languageCode,
                       buildModel,
                       checkModel,
                       attachModel,
                       posModel,
                       chunkModel,
                       rules,
                       ParserType.TreeInsert,
                       manifestInfoEntries));
        }
コード例 #24
0
 /// <summary>
 /// Create an event stream based on the specified data stream of the specified type using the specified head rules.
 /// </summary>
 /// <param name="samples">A 1-parse-per-line Penn Treebank Style parse.</param>
 /// <param name="rules">The head rules.</param>
 /// <param name="eType">The type of events desired (tag, chunk, build, or check).</param>
 public ParserEventStream(IObjectStream<Parse> samples, AbstractHeadRules rules, ParserEventTypeEnum eType)
     : this(samples, rules, eType, null) {}