//The format of configuration file public void LoadFeatureConfigFromFile(string configFilePath) { //Load configuration file config = new ConfigUtils(); config.LoadFile(configFilePath); //Get current directory from configuration file currentDirectory = config.GetValueOptional(CURRENT_DIRECTORY); if (string.IsNullOrEmpty(currentDirectory)) { currentDirectory = Environment.CurrentDirectory; } Logger.WriteLine($"Current directory : {currentDirectory}"); //Get model file path ModelFilePath = GetFilePath(currentDirectory, config.GetValueRequired(MODEL_FILEPATH)); Logger.WriteLine($"Main model is located at {ModelFilePath}"); featureContext = new Dictionary <string, List <int> >(); SetHiddenLayers(); SetOutputLayers(); SetPretrainedModel(); SetTFeatures(); var isCRFTraining = config.GetValueOptional(CRF_LAYER); IsCRFTraining = false; if (string.IsNullOrEmpty(isCRFTraining) == false) { IsCRFTraining = bool.Parse(isCRFTraining); } //Load model type ModelType = config.GetValueRequired(MODEL_TYPE) .Equals(MODELTYPE.SeqLabel.ToString(), StringComparison.InvariantCultureIgnoreCase) ? MODELTYPE.SeqLabel : MODELTYPE.Seq2Seq; Logger.WriteLine($"Model type: {ModelType}"); ModelDirection = config.GetValueRequired(MODEL_DIRECTION) .Equals(MODELDIRECTION.Forward.ToString(), StringComparison.InvariantCultureIgnoreCase) ? MODELDIRECTION.Forward : MODELDIRECTION.BiDirectional; Logger.WriteLine($"Model direction: {ModelDirection}"); //Load auto-encoder model for sequence-to-sequence. This model is used to encode source sequence if (ModelType == MODELTYPE.Seq2Seq) { var seqAutoEncoderConfigFilePath = GetFilePath(currentDirectory, config.GetValueRequired(SEQ2SEQ_AUTOENCODER_CONFIG)); Logger.WriteLine( $"Loading auto encoder model for sequnce-to-sequence task. Config file = '{seqAutoEncoderConfigFilePath}'"); Seq2SeqAutoEncoder = InitializeAutoEncoder(seqAutoEncoderConfigFilePath); } //Check if settings are validated CheckSettings(); }
private void SetTFeatures() { string tfeatureFileName = config.GetValueOptional(TFEATURE_FILENAME); if (String.IsNullOrEmpty(tfeatureFileName) == false) { //Load template feature set var tfeatureFilePath = GetFilePath(currentDirectory, tfeatureFileName); Logger.WriteLine($"Loading template feature set from {tfeatureFilePath}"); tFeaturizer = new TemplateFeaturizer(tfeatureFilePath); var tfeatureWeightType = config.GetValueRequired(TFEATURE_WEIGHT_TYPE); tFeatureWeightType = tfeatureWeightType.Equals("binary", StringComparison.InvariantCultureIgnoreCase) ? TFEATURE_WEIGHT_TYPE_ENUM.BINARY : TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY; Logger.WriteLine($"TFeature weight type: {tfeatureWeightType}"); var tfeatureContext = config.GetValueRequired(TFEATURE_CONTEXT); featureContext.Add(TFEATURE_CONTEXT, new List <int>()); foreach (var contextOffset in tfeatureContext.Split(',')) { featureContext[TFEATURE_CONTEXT].Add(int.Parse(contextOffset)); } Logger.WriteLine($"TFeature context: {tfeatureContext}"); } else { Logger.WriteLine($"No TFeature available."); } }
private void SetPretrainedModel() { //Load pre-trained model. It supports embedding model and auto-encoder model var preTrainTypeValue = config.GetValueRequired(PRETRAIN_TYPE); Logger.WriteLine("Pretrain type: {0}", preTrainTypeValue); if (preTrainTypeValue.Equals(RNNSharp.PRETRAIN_TYPE.AutoEncoder.ToString(), StringComparison.InvariantCultureIgnoreCase)) { preTrainType = RNNSharp.PRETRAIN_TYPE.AutoEncoder; var autoEncoderConfigFilePath = GetFilePath(currentDirectory, config.GetValueRequired(AUTOENCODER_CONFIG)); Logger.WriteLine($"Loading auto encoder model. Config file = '{autoEncoderConfigFilePath}'"); autoEncoder = InitializeAutoEncoder(autoEncoderConfigFilePath); } else { preTrainType = RNNSharp.PRETRAIN_TYPE.Embedding; var preTrainedModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_FILENAME); if (string.IsNullOrEmpty(preTrainedModelFilePath) == false) { preTrainedModelFilePath = GetFilePath(currentDirectory, preTrainedModelFilePath); if (preTrainedModel != null) { throw new ArgumentException( "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine($"Loading pretrained embedding model: {preTrainedModelFilePath}"); preTrainedModel = new WordEMWrapFeaturizer(preTrainedModelFilePath); } var preTrainedRawModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_RAW_FILENAME); if (string.IsNullOrEmpty(preTrainedRawModelFilePath) == false) { preTrainedRawModelFilePath = GetFilePath(currentDirectory, preTrainedRawModelFilePath); if (preTrainedModel != null) { throw new ArgumentException( "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine($"Loading pretrained embedding model {preTrainedRawModelFilePath} in text format"); preTrainedModel = new WordEMWrapFeaturizer(preTrainedRawModelFilePath, true); } preTrainedModelColumn = int.Parse(config.GetValueRequired(PRETRAINEDMODEL_COLUMN)); Logger.WriteLine("Pretrained model feature column: {0}", preTrainedModelColumn); var preTrainedModelContext = config.GetValueRequired(WORDEMBEDDING_CONTEXT); featureContext.Add(WORDEMBEDDING_CONTEXT, new List <int>()); foreach (var contextOffset in preTrainedModelContext.Split(',')) { featureContext[WORDEMBEDDING_CONTEXT].Add(int.Parse(contextOffset)); } Logger.WriteLine($"Pretrained model context offset : {preTrainedModelContext}"); } }
//The format of configuration file public void LoadFeatureConfigFromFile(string configFilePath) { //Load configuration file config = new ConfigUtils(); config.LoadFile(configFilePath); //Get current directory from configuration file currentDirectory = config.GetValueOptional(CURRENT_DIRECTORY); if (string.IsNullOrEmpty(currentDirectory)) { currentDirectory = Environment.CurrentDirectory; } Logger.WriteLine($"Current directory : {currentDirectory}"); //Get model file path ModelFilePath = GetFilePath(currentDirectory, config.GetValueRequired(MODEL_FILEPATH)); Logger.WriteLine($"Main model is located at {ModelFilePath}"); featureContext = new Dictionary <string, List <int> >(); var isCRFTraining = config.GetValueOptional(CRF_LAYER); IsCRFTraining = false; if (string.IsNullOrEmpty(isCRFTraining) == false) { IsCRFTraining = bool.Parse(isCRFTraining); } var maxSeqLength = config.GetValueOptional(MAX_SEQUENCE_LENGTH); if (String.IsNullOrEmpty(maxSeqLength) == false) { MaxSequenceLength = int.Parse(maxSeqLength); } //Load network type string networkType = config.GetValueRequired(NETWORK_TYPE); if (networkType.Equals(NETWORKTYPE.Forward.ToString(), StringComparison.InvariantCultureIgnoreCase)) { NetworkType = NETWORKTYPE.Forward; } else if (networkType.Equals(NETWORKTYPE.ForwardSeq2Seq.ToString(), StringComparison.InvariantCultureIgnoreCase)) { NetworkType = NETWORKTYPE.ForwardSeq2Seq; } else if (networkType.Equals(NETWORKTYPE.BiDirectional.ToString(), StringComparison.InvariantCultureIgnoreCase)) { NetworkType = NETWORKTYPE.BiDirectional; } else if (networkType.Equals(NETWORKTYPE.BiDirectionalAverage.ToString(), StringComparison.InvariantCultureIgnoreCase)) { NetworkType = NETWORKTYPE.BiDirectionalAverage; } else { throw new ArgumentException($"Invalidated network type: {networkType}"); } Logger.WriteLine($"Network type: {NetworkType}"); SetHiddenLayers(); SetOutputLayers(); SetPretrainedModel(); SetTFeatures(); //Load auto-encoder model for sequence-to-sequence. This model is used to encode source sequence if (NetworkType == NETWORKTYPE.ForwardSeq2Seq) { var seqAutoEncoderConfigFilePath = GetFilePath(currentDirectory, config.GetValueRequired(SEQ2SEQ_AUTOENCODER_CONFIG)); Logger.WriteLine( $"Loading auto encoder model for sequnce-to-sequence task. Config file = '{seqAutoEncoderConfigFilePath}'"); Seq2SeqAutoEncoder = InitializeAutoEncoder(seqAutoEncoderConfigFilePath); } //Check if settings are validated CheckSettings(); }