示例#1
0
文件: Parser.cs 项目: qooba/SharpNL
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <returns>The trained <see cref="ParserModel" /> object.</returns>
        public static ParserModel Train(
            Monitor monitor,
            string languageCode,
            IObjectStream <Parse> samples,
            AbstractHeadRules rules,
            TrainingParameters parameters)
        {
            var dict = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            var manifestInfoEntries = new Dictionary <string, string>();

            // build
            //System.err.println("Training builder");
            var bes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict);
            var buildReportMap = new Dictionary <string, string>();
            var buildTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);


            var buildModel = buildTrainer.Train(bes);

            MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // tag
            var posTaggerParams = parameters.GetNamespace("tagger");

            if (!posTaggerParams.Contains(Parameters.BeamSize))
            {
                posTaggerParams.Set(Parameters.BeamSize, "10");
            }


            var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples),
                                             parameters.GetNamespace("tagger"), new POSTaggerFactory());

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(languageCode,
                                             new ChunkSampleStream(samples),
                                             parameters.GetNamespace("chunker"),
                                             new ParserChunkerFactory());

            samples.Reset();

            // check
            //System.err.println("Training checker");
            var kes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary <string, string>();
            var checkTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);

            MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            return(new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries));
        }
 /// <summary>
 /// Repositions the stream at the beginning and the previously seen object
 /// sequence will be repeated exactly. This method can be used to re-read the
 /// stream if multiple passes over the objects are required.
 /// </summary>
 public void Reset()
 {
     if (testSampleStream != null || isPoisoned)
     {
         throw new InvalidOperationException();
     }
     index = 0;
     sampleStream.Reset();
 }
示例#3
0
 /// <summary>
 /// Repositions the stream at the beginning and the previously seen object
 /// sequence will be repeated exactly. This method can be used to re-read the
 /// stream if multiple passes over the objects are required.
 /// </summary>
 public void Reset()
 {
     if (adSentenceStream != null)
     {
         adSentenceStream.Reset();
     }
 }
        public TrainingSampleStream Next()
        {
            if (HasNext)
            {
                if (lastTrainingSampleStream != null)
                {
                    lastTrainingSampleStream.Poison();
                }

                sampleStream.Reset();

                lastTrainingSampleStream = new TrainingSampleStream(sampleStream, NumberOfPartitions, testIndex++);

                return(lastTrainingSampleStream);
            }

            throw new InvalidOperationException("No such element.");
        }
示例#5
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(
            string languageCode,
            IObjectStream <Parse> samples,
            AbstractHeadRules rules,
            TrainingParameters parameters,
            Monitor monitor)
        {
            var manifestInfoEntries = new Dictionary <string, string>();

            System.Diagnostics.Debug.Print("Building dictionary");

            var dictionary = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            // tag
            var posModel = POSTaggerME.Train(
                languageCode,
                new PosSampleStream(samples),
                parameters.GetNamespace("tagger"),
                new POSTaggerFactory(), monitor);

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(
                languageCode,
                new ChunkSampleStream(samples),
                parameters.GetNamespace("chunker"),
                new ChunkerFactory(), monitor);

            samples.Reset();

            // build
            System.Diagnostics.Debug.Print("Training builder");
            var bes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dictionary);
            var buildReportMap = new Dictionary <string, string>();
            var buildTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);

            var buildModel = buildTrainer.Train(bes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // check
            System.Diagnostics.Debug.Print("Training checker");
            var kes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary <string, string>();

            var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            samples.Reset();

            // attach
            System.Diagnostics.Debug.Print("Training attacher");
            var attachEvents    = new ParserEventStream(samples, rules, ParserEventTypeEnum.Attach);
            var attachReportMap = new Dictionary <string, string>();

            var attachTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("attach"), attachReportMap, monitor);

            var attachModel = attachTrainer.Train(attachEvents);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach");

            return(new ParserModel(
                       languageCode,
                       buildModel,
                       checkModel,
                       attachModel,
                       posModel,
                       chunkModel,
                       rules,
                       ParserType.TreeInsert,
                       manifestInfoEntries));
        }
示例#6
0
 /// <summary>
 /// Repositions the stream at the beginning and the previously seen object
 /// sequence will be repeated exactly. This method can be used to re-read the
 /// stream if multiple passes over the objects are required.
 /// </summary>
 public virtual void Reset()
 {
     IsFinished = false;
     stream.Reset();
 }
示例#7
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(
            string languageCode,
            IObjectStream<Parse> samples, 
            AbstractHeadRules rules, 
            TrainingParameters parameters,
            Monitor monitor) {

            var manifestInfoEntries = new Dictionary<string, string>();

#if DEBUG
            System.Diagnostics.Debug.Print("Building dictionary");
#endif

            var dictionary = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            // tag
            var posModel = POSTaggerME.Train(
                languageCode,
                new PosSampleStream(samples),
                parameters.GetNamespace("tagger"),
                new POSTaggerFactory(), monitor);

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(
                languageCode,
                new ChunkSampleStream(samples),
                parameters.GetNamespace("chunker"),
                new ParserChunkerFactory(), 
                monitor);

            samples.Reset();

            // build

#if DEBUG
            System.Diagnostics.Debug.Print("Training builder");
#endif

            var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dictionary);
            var buildReportMap = new Dictionary<string, string>();
            var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);

            var buildModel = buildTrainer.Train(bes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // check
#if DEBUG
            System.Diagnostics.Debug.Print("Training checker");
#endif
            var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary<string, string>();

            var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            samples.Reset();

            // attach
#if DEBUG
            System.Diagnostics.Debug.Print("Training attacher");
#endif
            var attachEvents = new ParserEventStream(samples, rules, ParserEventTypeEnum.Attach);
            var attachReportMap = new Dictionary<string, string>();

            var attachTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("attach"), attachReportMap, monitor);

            var attachModel = attachTrainer.Train(attachEvents);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach");

            return new ParserModel(
                languageCode,
                buildModel,
                checkModel,
                attachModel,
                posModel,
                chunkModel,
                rules,
                ParserType.TreeInsert,
                manifestInfoEntries);
        }
示例#8
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <returns>The trained <see cref="ParserModel" /> object.</returns>
        public static ParserModel Train(
            Monitor monitor,
            string languageCode, 
            IObjectStream<Parse> samples, 
            AbstractHeadRules rules,
            TrainingParameters parameters) {

            var dict = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            var manifestInfoEntries = new Dictionary<string, string>();

            // build
            //System.err.println("Training builder");
            var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict);
            var buildReportMap = new Dictionary<string, string>();
            var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);


            var buildModel = buildTrainer.Train(bes);

            MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // tag
            var posTaggerParams = parameters.GetNamespace("tagger");
            if (!posTaggerParams.Contains(Parameters.BeamSize))
                posTaggerParams.Set(Parameters.BeamSize, "10");


            var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples),
                parameters.GetNamespace("tagger"), new POSTaggerFactory());

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(languageCode, 
                new ChunkSampleStream(samples),
                parameters.GetNamespace("chunker"),
                new ParserChunkerFactory());

            samples.Reset();

            // check
            //System.err.println("Training checker");
            var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary<string, string>();
            var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);
            MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            return new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries);
        }
 /// <summary>
 /// Repositions the stream at the beginning and the previously seen object
 /// sequence will be repeated exactly. This method can be used to re-read the
 /// stream if multiple passes over the objects are required.
 /// </summary>
 public void Reset()
 {
     lineStream.Reset();
 }
示例#10
0
 /// <summary>
 /// Repositions the stream at the beginning and the previously seen object
 /// sequence will be repeated exactly. This method can be used to re-read the
 /// stream if multiple passes over the objects are required.
 /// </summary>
 public void Reset()
 {
     psi.Reset();
 }
示例#11
0
 /// <summary>
 /// Repositions the stream at the beginning and the previously seen object
 /// sequence will be repeated exactly. This method can be used to re-read the
 /// stream if multiple passes over the objects are required.
 /// </summary>
 /// <exception cref="NotSupportedException"></exception>
 public void Reset()
 {
     input.Reset();
 }
示例#12
0
 public void Reset()
 {
     samples.Reset();
 }
示例#13
0
 /// <summary>
 /// Repositions the stream at the beginning and the previously seen object
 /// sequence will be repeated exactly. This method can be used to re-read the
 /// stream if multiple passes over the objects are required.
 /// </summary>
 public void Reset()
 {
     objectStream.Reset();
 }
示例#14
0
 /// <summary>
 /// Repositions the stream at the beginning and the previously seen object
 /// sequence will be repeated exactly. This method can be used to re-read the
 /// stream if multiple passes over the objects are required.
 /// </summary>
 public void Reset()
 {
     adSentenceStream.Reset();
 }
示例#15
0
 /// <summary>
 /// Repositions the stream at the beginning and the previously seen object
 /// sequence will be repeated exactly. This method can be used to re-read the
 /// stream if multiple passes over the objects are required.
 /// </summary>
 public void Reset()
 {
     events = new List <Event>().GetEnumerator();
     samples.Reset();
 }