private void SetPretrainedModel() { //Load pre-trained model. It supports embedding model and auto-encoder model var preTrainTypeValue = config.GetValueRequired(PRETRAIN_TYPE); Logger.WriteLine("Pretrain type: {0}", preTrainTypeValue); if (preTrainTypeValue.Equals(RNNSharp.PRETRAIN_TYPE.AutoEncoder.ToString(), StringComparison.InvariantCultureIgnoreCase)) { preTrainType = RNNSharp.PRETRAIN_TYPE.AutoEncoder; var autoEncoderConfigFilePath = GetFilePath(currentDirectory, config.GetValueRequired(AUTOENCODER_CONFIG)); Logger.WriteLine($"Loading auto encoder model. Config file = '{autoEncoderConfigFilePath}'"); autoEncoder = InitializeAutoEncoder(autoEncoderConfigFilePath); } else { preTrainType = RNNSharp.PRETRAIN_TYPE.Embedding; var preTrainedModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_FILENAME); if (string.IsNullOrEmpty(preTrainedModelFilePath) == false) { preTrainedModelFilePath = GetFilePath(currentDirectory, preTrainedModelFilePath); if (preTrainedModel != null) { throw new ArgumentException( "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine($"Loading pretrained embedding model: {preTrainedModelFilePath}"); preTrainedModel = new WordEMWrapFeaturizer(preTrainedModelFilePath); } var preTrainedRawModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_RAW_FILENAME); if (string.IsNullOrEmpty(preTrainedRawModelFilePath) == false) { preTrainedRawModelFilePath = GetFilePath(currentDirectory, preTrainedRawModelFilePath); if (preTrainedModel != null) { throw new ArgumentException( "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine($"Loading pretrained embedding model {preTrainedRawModelFilePath} in text format"); preTrainedModel = new WordEMWrapFeaturizer(preTrainedRawModelFilePath, true); } preTrainedModelColumn = int.Parse(config.GetValueRequired(PRETRAINEDMODEL_COLUMN)); Logger.WriteLine("Pretrained model feature column: {0}", preTrainedModelColumn); var preTrainedModelContext = config.GetValueRequired(WORDEMBEDDING_CONTEXT); featureContext.Add(WORDEMBEDDING_CONTEXT, new List <int>()); foreach (var contextOffset in preTrainedModelContext.Split(',')) { featureContext[WORDEMBEDDING_CONTEXT].Add(int.Parse(contextOffset)); } Logger.WriteLine($"Pretrained model context offset : {preTrainedModelContext}"); } }
//The format of configuration file public void LoadFeatureConfigFromFile(string strFileName) { StreamReader sr = new StreamReader(strFileName); string strLine = null; m_FeatureConfiguration = new Dictionary <string, List <int> >(); while ((strLine = sr.ReadLine()) != null) { strLine = strLine.Trim(); if (strLine.Length == 0) { //Emtpy line, ignore it continue; } if (strLine.StartsWith("#") == true) { //Comments line, ignore it continue; } string[] kv = strLine.Split(':'); string strKey = kv[0].Trim(); string strValue = kv[1].Trim().ToLower(); if (strKey == WORDEMBEDDING_FILENAME) { if (m_WordEmbedding != null) { throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading embedding feature set from model {0}", strValue); m_WordEmbedding = new WordEMWrapFeaturizer(strValue); continue; } else if (strKey == WORDEMBEDDING_RAW_FILENAME) { if (m_WordEmbedding != null) { throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading embedding feature set from model {0} in text format", strValue); m_WordEmbedding = new WordEMWrapFeaturizer(strValue, true); continue; } else if (strKey == TFEATURE_FILENAME) { Logger.WriteLine("Loading template feature set..."); m_TFeaturizer = new TemplateFeaturizer(strValue); continue; } else if (strKey == WORDEMBEDDING_COLUMN) { m_WordEmbeddingCloumn = int.Parse(strValue); Logger.WriteLine("Word embedding feature column: {0}", m_WordEmbeddingCloumn); continue; } else if (strKey == TFEATURE_WEIGHT_TYPE) { Logger.WriteLine("TFeature weighting type: {0}", strValue); if (strValue == "binary") { m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY; } else { m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY; } continue; } string[] values = strValue.Split(','); if (m_FeatureConfiguration.ContainsKey(strKey) == false) { m_FeatureConfiguration.Add(strKey, new List <int>()); } foreach (string value in values) { m_FeatureConfiguration[strKey].Add(int.Parse(value)); } } sr.Close(); }
//The format of configuration file public void LoadFeatureConfigFromFile(string strFileName) { StreamReader sr = new StreamReader(strFileName); string strLine = null; FeatureContext = new Dictionary <string, List <int> >(); while ((strLine = sr.ReadLine()) != null) { strLine = strLine.Trim(); if (strLine.Length == 0) { //Emtpy line, ignore it continue; } if (strLine.StartsWith("#") == true) { //Comments line, ignore it continue; } int idxSeparator = strLine.IndexOf(':'); string strKey = strLine.Substring(0, idxSeparator).Trim(); string strValue = strLine.Substring(idxSeparator + 1).Trim(); if (strKey == PRETRAINEDMODEL_FILENAME) { if (PretainedModel != null) { throw new ArgumentException("Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading pretrained dense feature set from model {0}", strValue); PretainedModel = new WordEMWrapFeaturizer(strValue); } else if (strKey == PRETRAINEDMODEL_RAW_FILENAME) { if (PretainedModel != null) { throw new ArgumentException("Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading pretrained dense feature set from model {0} in text format", strValue); PretainedModel = new WordEMWrapFeaturizer(strValue, true); } else if (strKey == TFEATURE_FILENAME) { Logger.WriteLine("Loading template feature set..."); TFeaturizer = new TemplateFeaturizer(strValue); } else if (strKey == PRETRAINEDMODEL_COLUMN) { PretrainedModelColumn = int.Parse(strValue); Logger.WriteLine("Pretrained model feature column: {0}", PretrainedModelColumn); } else if (strKey == TFEATURE_WEIGHT_TYPE) { Logger.WriteLine("TFeature weighting type: {0}", strValue); if (strValue == "binary") { TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY; } else { TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY; } } else if (strKey == PRETRAIN_TYPE) { if (strValue.Equals(RNNSharp.PRETRAIN_TYPE.AUTOENCODER.ToString(), StringComparison.InvariantCultureIgnoreCase)) { preTrainType = RNNSharp.PRETRAIN_TYPE.AUTOENCODER; } else { preTrainType = RNNSharp.PRETRAIN_TYPE.EMBEDDING; } Logger.WriteLine("Pretrain type: {0}", preTrainType); } else if (strKey == AUTOENCODER_FEATURECONFIG) { autoEncoderFeatureConfigFile = strValue; Logger.WriteLine("Auto encoder configuration file: {0}", autoEncoderFeatureConfigFile); } else if (strKey == AUTOENCODER_MODEL) { autoEncoderModelFile = strValue; Logger.WriteLine("Auto encoder model file: {0}", autoEncoderModelFile); } else { string[] values = strValue.Split(','); if (FeatureContext.ContainsKey(strKey) == false) { FeatureContext.Add(strKey, new List <int>()); } foreach (string value in values) { FeatureContext[strKey].Add(int.Parse(value)); } } } sr.Close(); }
//The format of configuration file public void LoadFeatureConfigFromFile(string strFileName) { StreamReader sr = new StreamReader(strFileName); string strLine = null; m_FeatureConfiguration = new Dictionary<string, List<int>>(); while ((strLine = sr.ReadLine()) != null) { strLine = strLine.Trim(); if (strLine.Length == 0) { //Emtpy line, ignore it continue; } if (strLine.StartsWith("#") == true) { //Comments line, ignore it continue; } string[] kv = strLine.Split(':'); string strKey = kv[0].Trim(); string strValue = kv[1].Trim().ToLower(); if (strKey == WORDEMBEDDING_FILENAME) { if (m_WordEmbedding != null) { throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading embedding feature set from model {0}", strValue); m_WordEmbedding = new WordEMWrapFeaturizer(strValue); continue; } else if (strKey == WORDEMBEDDING_RAW_FILENAME) { if (m_WordEmbedding != null) { throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading embedding feature set from model {0} in text format", strValue); m_WordEmbedding = new WordEMWrapFeaturizer(strValue, true); continue; } else if (strKey == TFEATURE_FILENAME) { Logger.WriteLine("Loading template feature set..."); m_TFeaturizer = new TemplateFeaturizer(strValue); continue; } else if (strKey == WORDEMBEDDING_COLUMN) { m_WordEmbeddingCloumn = int.Parse(strValue); Logger.WriteLine("Word embedding feature column: {0}", m_WordEmbeddingCloumn); continue; } else if (strKey == TFEATURE_WEIGHT_TYPE) { Logger.WriteLine("TFeature weighting type: {0}", strValue); if (strValue == "binary") { m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY; } else { m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY; } continue; } string[] values = strValue.Split(','); if (m_FeatureConfiguration.ContainsKey(strKey) == false) { m_FeatureConfiguration.Add(strKey, new List<int>()); } foreach (string value in values) { m_FeatureConfiguration[strKey].Add(int.Parse(value)); } } sr.Close(); }