Пример #1
0
        private static void Train()
        {
            Logger.LogFile = "RNNSharpConsole.log";

            if (File.Exists(strTagFile) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The tag mapping file {0} isn't existed.", strTagFile);
                UsageTrain();
                return;
            }

            //Load tag id and its name from file
            TagSet tagSet = new TagSet(strTagFile);

            //Create configuration instance and set parameters
            ModelSetting RNNConfig = new ModelSetting();
            RNNConfig.ModelFile = strModelFile;
            RNNConfig.NumHidden = layersize;
            RNNConfig.IsCRFTraining = (iCRF == 1) ? true : false;
            RNNConfig.ModelDirection = iDir;
            RNNConfig.ModelType = modelType;
            RNNConfig.MaxIteration = maxIter;
            RNNConfig.SaveStep = savestep;
            RNNConfig.LearningRate = alpha;
            RNNConfig.Dropout = dropout;
            RNNConfig.Bptt = bptt;

            //Dump RNN setting on console
            RNNConfig.DumpSetting();

            if (File.Exists(strFeatureConfigFile) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} doesn't exist.", strFeatureConfigFile);
                UsageTrain();
                return;
            }
            //Create feature extractors and load word embedding data from file
            Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet);
            featurizer.ShowFeatureSize();

            if (featurizer.IsRunTimeFeatureUsed() == true && iDir == 1)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: Run time feature is not available for bi-directional RNN model.");
                UsageTrain();
                return;
            }

            if (File.Exists(strTrainFile) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The training corpus doesn't exist.");
                UsageTrain();
                return;
            }

            if (File.Exists(strValidFile) == false)
            {
                Logger.WriteLine(Logger.Level.err, "FAILED: The validation corpus doesn't exist.");
                UsageTrain();
                return;
            }

            //Create RNN encoder and save necessary parameters
            RNNEncoder encoder = new RNNEncoder(RNNConfig);

            //LoadFeatureConfig training corpus and extract feature set
            encoder.TrainingSet = new DataSet(tagSet.GetSize());
            LoadDataset(strTrainFile, featurizer, encoder.TrainingSet);

            //LoadFeatureConfig validated corpus and extract feature set
            encoder.ValidationSet = new DataSet(tagSet.GetSize());
            LoadDataset(strValidFile, featurizer, encoder.ValidationSet);

            if (iCRF == 1)
            {
                Logger.WriteLine(Logger.Level.info, "Initialize output tag bigram transition probability...");
                //Build tag bigram transition matrix
                encoder.TrainingSet.BuildLabelBigramTransition();
            }

            //Start to train the model
            encoder.Train();

        }
Пример #2
0
        private static void Train()
        {
            if (File.Exists(strTagFile) == false)
            {
                Console.WriteLine("FAILED: The tag mapping file {0} isn't existed.", strTagFile);
                UsageTrain();
                return;
            }

            //Load tag id and its name from file
            TagSet tagSet = new TagSet(strTagFile);

            //Create configuration instance and set parameters
            ModelSetting RNNConfig = new ModelSetting();
            RNNConfig.SetModelFile(strModelFile);
            RNNConfig.SetNumHidden(layersize);
            RNNConfig.SetCRFTraining((iCRF == 1) ? true : false);
            RNNConfig.SetDir(iDir);
            RNNConfig.SetModelType(modelType);
            RNNConfig.SetMaxIteration(maxIter);
            RNNConfig.SetSaveStep(savestep);
            RNNConfig.SetLearningRate(alpha);
            RNNConfig.SetRegularization(beta);
            RNNConfig.SetBptt(bptt);

            //Dump RNN setting on console
            RNNConfig.DumpSetting();

            if (File.Exists(strFeatureConfigFile) == false)
            {
                Console.WriteLine("FAILED: The feature configuration file {0} isn't existed.", strFeatureConfigFile);
                UsageTrain();
                return;
            }
            //Create feature extractors and load word embedding data from file
            Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet);
            featurizer.ShowFeatureSize();

            if (File.Exists(strTrainFile) == false)
            {
                Console.WriteLine("FAILED: The training corpus {0} isn't existed.", strTrainFile);
                UsageTrain();
                return;
            }

            //LoadFeatureConfig training corpus and extract feature set
            DataSet dataSetTrain = new DataSet(tagSet.GetSize());
            LoadDataset(strTrainFile, featurizer, dataSetTrain);

            if (File.Exists(strValidFile) == false)
            {
                Console.WriteLine("FAILED: The validated corpus {0} isn't existed.", strValidFile);
                UsageTrain();
                return;
            }

            //LoadFeatureConfig validated corpus and extract feature set
            DataSet dataSetValidation = new DataSet(tagSet.GetSize());
            LoadDataset(strValidFile, featurizer, dataSetValidation);

            //Create RNN encoder and save necessary parameters
            RNNEncoder encoder = new RNNEncoder(RNNConfig);
            encoder.SetTrainingSet(dataSetTrain);
            encoder.SetValidationSet(dataSetValidation);

            if (iCRF == 1)
            {
                Console.WriteLine("Initialize output tag bigram transition probability...");
                //Build tag bigram transition matrix
                dataSetTrain.BuildLabelBigramTransition();
                encoder.SetLabelBigramTransition(dataSetTrain.GetLabelBigramTransition());
            }

            //Start to train the model
            encoder.Train();
        }