Beispiel #1
0
        private static void Train()
        {
            Logger.LogFile = "RNNSharpConsole.log";

            if (File.Exists(strTagFile) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The tag mapping file {0} isn't existed.", strTagFile);
                UsageTrain();
                return;
            }

            //Load tag id and its name from file
            TagSet tagSet = new TagSet(strTagFile);

            //Create configuration instance and set parameters
            ModelSetting RNNConfig = new ModelSetting();

            RNNConfig.TagFile             = strTagFile;
            RNNConfig.Tags                = tagSet;
            RNNConfig.ModelFile           = strModelFile;
            RNNConfig.HiddenLayerSizeList = hiddenLayerSizeList;
            RNNConfig.IsCRFTraining       = (iCRF == 1) ? true : false;
            RNNConfig.ModelDirection      = iDir;
            RNNConfig.VQ              = iVQ;
            RNNConfig.ModelType       = ParseLayerType(hiddenLayerType);
            RNNConfig.OutputLayerType = ParseLayerType(outputLayerType);
            RNNConfig.MaxIteration    = maxIter;
            RNNConfig.SaveStep        = savestep;
            RNNConfig.LearningRate    = alpha;
            RNNConfig.Dropout         = dropout;
            RNNConfig.Bptt            = bptt;
            RNNConfig.GradientCutoff  = gradientCutoff;
            RNNConfig.NCESampleSize   = nceSampleSize;

            //Dump RNN setting on console
            RNNConfig.DumpSetting();

            if (File.Exists(strFeatureConfigFile) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} doesn't exist.", strFeatureConfigFile);
                UsageTrain();
                return;
            }
            //Create feature extractors and load word embedding data from file
            Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet);

            featurizer.ShowFeatureSize();

            if (featurizer.IsRunTimeFeatureUsed() == true && iDir == 1)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: Run time feature is not available for bi-directional RNN model.");
                UsageTrain();
                return;
            }

            if (File.Exists(strTrainFile) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The training corpus doesn't exist.");
                UsageTrain();
                return;
            }

            //Create RNN encoder and save necessary parameters
            RNNEncoder encoder = new RNNEncoder(RNNConfig);

            //LoadFeatureConfig training corpus and extract feature set
            encoder.TrainingSet = new DataSet(tagSet.GetSize());
            LoadDataset(strTrainFile, featurizer, encoder.TrainingSet);
            RNNConfig.TrainDataSet = encoder.TrainingSet;

            if (String.IsNullOrEmpty(strValidFile) == false)
            {
                //LoadFeatureConfig validated corpus and extract feature set
                Logger.WriteLine("Loading validated corpus from {0}", strValidFile);
                encoder.ValidationSet = new DataSet(tagSet.GetSize());
                LoadDataset(strValidFile, featurizer, encoder.ValidationSet);
            }
            else
            {
                Logger.WriteLine("Validated corpus isn't specified.");
                encoder.ValidationSet = null;
            }

            if (iCRF == 1)
            {
                Logger.WriteLine("Initialize output tag bigram transition probability...");
                //Build tag bigram transition matrix
                encoder.TrainingSet.BuildLabelBigramTransition();
            }

            //Start to train the model
            encoder.Train();
        }
Beispiel #2
0
        private static void Train()
        {
            Logger.LogFile = "RNNSharpConsole.log";

            if (File.Exists(tagFilePath) == false)
            {
                Logger.WriteLine(Logger.Level.err, $"FAILED: The tag mapping file {tagFilePath} doesn't exist.");
                UsageTrain();
                return;
            }

            //Load tag id and its name from file
            var tagSet = new TagSet(tagFilePath);

            //Create configuration instance and set parameters
            var RNNConfig = new ModelSetting
            {
                VQ             = iVQ,
                MaxIteration   = maxIter,
                SaveStep       = savestep,
                LearningRate   = alpha,
                GradientCutoff = gradientCutoff,
                IsConstAlpha   = constAlpha
            };

            //Dump RNN setting on console
            RNNConfig.DumpSetting();

            if (File.Exists(configFilePath) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} doesn't exist.",
                                 configFilePath);
                UsageTrain();
                return;
            }
            //Create feature extractors and load word embedding data from file
            var config = new Config(configFilePath, tagSet);

            config.ShowFeatureSize();

            if (File.Exists(trainFilePath) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The training corpus doesn't exist.");
                UsageTrain();
                return;
            }

            if (config.ModelType == MODELTYPE.SeqLabel)
            {
                //Create RNN encoder and save necessary parameters
                var encoder = new RNNEncoder <Sequence>(RNNConfig, config)
                {
                    TrainingSet = new DataSet <Sequence>(tagSet.GetSize())
                };

                //LoadFeatureConfig training corpus and extract feature set
                LoadDataset(trainFilePath, config, encoder.TrainingSet);

                if (string.IsNullOrEmpty(validFilePath) == false)
                {
                    //LoadFeatureConfig validated corpus and extract feature set
                    Logger.WriteLine("Loading validated corpus from {0}", validFilePath);
                    encoder.ValidationSet = new DataSet <Sequence>(tagSet.GetSize());
                    LoadDataset(validFilePath, config, encoder.ValidationSet);
                }
                else
                {
                    Logger.WriteLine("Validated corpus isn't specified.");
                    encoder.ValidationSet = null;
                }

                if (encoder.IsCRFTraining)
                {
                    Logger.WriteLine("Initialize output tag bigram transition probability...");
                    //Build tag bigram transition matrix
                    encoder.TrainingSet.BuildLabelBigramTransition();
                }

                //Start to train the model
                encoder.Train();
            }
            else
            {
                //Create RNN encoder and save necessary parameters
                var encoder = new RNNEncoder <SequencePair>(RNNConfig, config)
                {
                    TrainingSet = new DataSet <SequencePair>(tagSet.GetSize())
                };

                //LoadFeatureConfig training corpus and extract feature set

                LoadSeq2SeqDataSet(trainFilePath, config, encoder.TrainingSet);

                if (string.IsNullOrEmpty(validFilePath) == false)
                {
                    //LoadFeatureConfig validated corpus and extract feature set
                    Logger.WriteLine("Loading validated corpus from {0}", validFilePath);
                    encoder.ValidationSet = new DataSet <SequencePair>(tagSet.GetSize());
                    LoadSeq2SeqDataSet(validFilePath, config, encoder.ValidationSet);
                }
                else
                {
                    Logger.WriteLine("Validated corpus isn't specified.");
                    encoder.ValidationSet = null;
                }

                if (encoder.IsCRFTraining)
                {
                    Logger.WriteLine("Initialize output tag bigram transition probability...");
                    //Build tag bigram transition matrix
                    encoder.TrainingSet.BuildLabelBigramTransition();
                }

                //Start to train the model
                encoder.Train();
            }
        }