Example #1
0
        //The format of configuration file
        public void LoadFeatureConfigFromFile(string configFilePath)
        {
            //Load configuration file
            config = new ConfigUtils();
            config.LoadFile(configFilePath);

            //Get current directory from configuration file
            currentDirectory = config.GetValueOptional(CURRENT_DIRECTORY);
            if (string.IsNullOrEmpty(currentDirectory))
            {
                currentDirectory = Environment.CurrentDirectory;
            }
            Logger.WriteLine($"Current directory : {currentDirectory}");

            //Get model file path
            ModelFilePath = GetFilePath(currentDirectory, config.GetValueRequired(MODEL_FILEPATH));
            Logger.WriteLine($"Main model is located at {ModelFilePath}");

            featureContext = new Dictionary <string, List <int> >();

            SetHiddenLayers();
            SetOutputLayers();
            SetPretrainedModel();
            SetTFeatures();

            var isCRFTraining = config.GetValueOptional(CRF_LAYER);

            IsCRFTraining = false;
            if (string.IsNullOrEmpty(isCRFTraining) == false)
            {
                IsCRFTraining = bool.Parse(isCRFTraining);
            }

            //Load model type
            ModelType = config.GetValueRequired(MODEL_TYPE)
                        .Equals(MODELTYPE.SeqLabel.ToString(), StringComparison.InvariantCultureIgnoreCase)
                ? MODELTYPE.SeqLabel
                : MODELTYPE.Seq2Seq;
            Logger.WriteLine($"Model type: {ModelType}");

            ModelDirection = config.GetValueRequired(MODEL_DIRECTION)
                             .Equals(MODELDIRECTION.Forward.ToString(), StringComparison.InvariantCultureIgnoreCase)
                ? MODELDIRECTION.Forward
                : MODELDIRECTION.BiDirectional;
            Logger.WriteLine($"Model direction: {ModelDirection}");

            //Load auto-encoder model for sequence-to-sequence. This model is used to encode source sequence
            if (ModelType == MODELTYPE.Seq2Seq)
            {
                var seqAutoEncoderConfigFilePath = GetFilePath(currentDirectory,
                                                               config.GetValueRequired(SEQ2SEQ_AUTOENCODER_CONFIG));
                Logger.WriteLine(
                    $"Loading auto encoder model for sequnce-to-sequence task. Config file = '{seqAutoEncoderConfigFilePath}'");

                Seq2SeqAutoEncoder = InitializeAutoEncoder(seqAutoEncoderConfigFilePath);
            }

            //Check if settings are validated
            CheckSettings();
        }
Example #2
0
        private void SetTFeatures()
        {
            string tfeatureFileName = config.GetValueOptional(TFEATURE_FILENAME);

            if (String.IsNullOrEmpty(tfeatureFileName) == false)
            {
                //Load template feature set
                var tfeatureFilePath = GetFilePath(currentDirectory, tfeatureFileName);
                Logger.WriteLine($"Loading template feature set from {tfeatureFilePath}");
                tFeaturizer = new TemplateFeaturizer(tfeatureFilePath);

                var tfeatureWeightType = config.GetValueRequired(TFEATURE_WEIGHT_TYPE);
                tFeatureWeightType = tfeatureWeightType.Equals("binary", StringComparison.InvariantCultureIgnoreCase)
                    ? TFEATURE_WEIGHT_TYPE_ENUM.BINARY
                    : TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY;
                Logger.WriteLine($"TFeature weight type: {tfeatureWeightType}");

                var tfeatureContext = config.GetValueRequired(TFEATURE_CONTEXT);
                featureContext.Add(TFEATURE_CONTEXT, new List <int>());
                foreach (var contextOffset in tfeatureContext.Split(','))
                {
                    featureContext[TFEATURE_CONTEXT].Add(int.Parse(contextOffset));
                }
                Logger.WriteLine($"TFeature context: {tfeatureContext}");
            }
            else
            {
                Logger.WriteLine($"No TFeature available.");
            }
        }
Example #3
0
        private void SetPretrainedModel()
        {
            //Load pre-trained model. It supports embedding model and auto-encoder model
            var preTrainTypeValue = config.GetValueRequired(PRETRAIN_TYPE);

            Logger.WriteLine("Pretrain type: {0}", preTrainTypeValue);

            if (preTrainTypeValue.Equals(RNNSharp.PRETRAIN_TYPE.AutoEncoder.ToString(),
                                         StringComparison.InvariantCultureIgnoreCase))
            {
                preTrainType = RNNSharp.PRETRAIN_TYPE.AutoEncoder;
                var autoEncoderConfigFilePath = GetFilePath(currentDirectory,
                                                            config.GetValueRequired(AUTOENCODER_CONFIG));
                Logger.WriteLine($"Loading auto encoder model. Config file = '{autoEncoderConfigFilePath}'");
                autoEncoder = InitializeAutoEncoder(autoEncoderConfigFilePath);
            }
            else
            {
                preTrainType = RNNSharp.PRETRAIN_TYPE.Embedding;
                var preTrainedModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_FILENAME);
                if (string.IsNullOrEmpty(preTrainedModelFilePath) == false)
                {
                    preTrainedModelFilePath = GetFilePath(currentDirectory, preTrainedModelFilePath);
                    if (preTrainedModel != null)
                    {
                        throw new ArgumentException(
                                  "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine($"Loading pretrained embedding model: {preTrainedModelFilePath}");
                    preTrainedModel = new WordEMWrapFeaturizer(preTrainedModelFilePath);
                }

                var preTrainedRawModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_RAW_FILENAME);
                if (string.IsNullOrEmpty(preTrainedRawModelFilePath) == false)
                {
                    preTrainedRawModelFilePath = GetFilePath(currentDirectory, preTrainedRawModelFilePath);
                    if (preTrainedModel != null)
                    {
                        throw new ArgumentException(
                                  "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine($"Loading pretrained embedding model {preTrainedRawModelFilePath} in text format");
                    preTrainedModel = new WordEMWrapFeaturizer(preTrainedRawModelFilePath, true);
                }

                preTrainedModelColumn = int.Parse(config.GetValueRequired(PRETRAINEDMODEL_COLUMN));
                Logger.WriteLine("Pretrained model feature column: {0}", preTrainedModelColumn);

                var preTrainedModelContext = config.GetValueRequired(WORDEMBEDDING_CONTEXT);
                featureContext.Add(WORDEMBEDDING_CONTEXT, new List <int>());
                foreach (var contextOffset in preTrainedModelContext.Split(','))
                {
                    featureContext[WORDEMBEDDING_CONTEXT].Add(int.Parse(contextOffset));
                }
                Logger.WriteLine($"Pretrained model context offset : {preTrainedModelContext}");
            }
        }
Example #4
0
        //The format of configuration file
        public void LoadFeatureConfigFromFile(string configFilePath)
        {
            //Load configuration file
            config = new ConfigUtils();
            config.LoadFile(configFilePath);

            //Get current directory from configuration file
            currentDirectory = config.GetValueOptional(CURRENT_DIRECTORY);
            if (string.IsNullOrEmpty(currentDirectory))
            {
                currentDirectory = Environment.CurrentDirectory;
            }
            Logger.WriteLine($"Current directory : {currentDirectory}");

            //Get model file path
            ModelFilePath = GetFilePath(currentDirectory, config.GetValueRequired(MODEL_FILEPATH));
            Logger.WriteLine($"Main model is located at {ModelFilePath}");

            featureContext = new Dictionary <string, List <int> >();

            var isCRFTraining = config.GetValueOptional(CRF_LAYER);

            IsCRFTraining = false;
            if (string.IsNullOrEmpty(isCRFTraining) == false)
            {
                IsCRFTraining = bool.Parse(isCRFTraining);
            }

            var maxSeqLength = config.GetValueOptional(MAX_SEQUENCE_LENGTH);

            if (String.IsNullOrEmpty(maxSeqLength) == false)
            {
                MaxSequenceLength = int.Parse(maxSeqLength);
            }

            //Load network type
            string networkType = config.GetValueRequired(NETWORK_TYPE);

            if (networkType.Equals(NETWORKTYPE.Forward.ToString(), StringComparison.InvariantCultureIgnoreCase))
            {
                NetworkType = NETWORKTYPE.Forward;
            }
            else if (networkType.Equals(NETWORKTYPE.ForwardSeq2Seq.ToString(), StringComparison.InvariantCultureIgnoreCase))
            {
                NetworkType = NETWORKTYPE.ForwardSeq2Seq;
            }
            else if (networkType.Equals(NETWORKTYPE.BiDirectional.ToString(), StringComparison.InvariantCultureIgnoreCase))
            {
                NetworkType = NETWORKTYPE.BiDirectional;
            }
            else if (networkType.Equals(NETWORKTYPE.BiDirectionalAverage.ToString(), StringComparison.InvariantCultureIgnoreCase))
            {
                NetworkType = NETWORKTYPE.BiDirectionalAverage;
            }
            else
            {
                throw new ArgumentException($"Invalidated network type: {networkType}");
            }
            Logger.WriteLine($"Network type: {NetworkType}");

            SetHiddenLayers();
            SetOutputLayers();
            SetPretrainedModel();
            SetTFeatures();

            //Load auto-encoder model for sequence-to-sequence. This model is used to encode source sequence
            if (NetworkType == NETWORKTYPE.ForwardSeq2Seq)
            {
                var seqAutoEncoderConfigFilePath = GetFilePath(currentDirectory,
                                                               config.GetValueRequired(SEQ2SEQ_AUTOENCODER_CONFIG));
                Logger.WriteLine(
                    $"Loading auto encoder model for sequnce-to-sequence task. Config file = '{seqAutoEncoderConfigFilePath}'");

                Seq2SeqAutoEncoder = InitializeAutoEncoder(seqAutoEncoderConfigFilePath);
            }

            //Check if settings are validated
            CheckSettings();
        }