private void SetTFeatures() { string tfeatureFileName = config.GetValueOptional(TFEATURE_FILENAME); if (String.IsNullOrEmpty(tfeatureFileName) == false) { //Load template feature set var tfeatureFilePath = GetFilePath(currentDirectory, tfeatureFileName); Logger.WriteLine($"Loading template feature set from {tfeatureFilePath}"); tFeaturizer = new TemplateFeaturizer(tfeatureFilePath); var tfeatureWeightType = config.GetValueRequired(TFEATURE_WEIGHT_TYPE); tFeatureWeightType = tfeatureWeightType.Equals("binary", StringComparison.InvariantCultureIgnoreCase) ? TFEATURE_WEIGHT_TYPE_ENUM.BINARY : TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY; Logger.WriteLine($"TFeature weight type: {tfeatureWeightType}"); var tfeatureContext = config.GetValueRequired(TFEATURE_CONTEXT); featureContext.Add(TFEATURE_CONTEXT, new List <int>()); foreach (var contextOffset in tfeatureContext.Split(',')) { featureContext[TFEATURE_CONTEXT].Add(int.Parse(contextOffset)); } Logger.WriteLine($"TFeature context: {tfeatureContext}"); } else { Logger.WriteLine($"No TFeature available."); } }
//The format of configuration file public void LoadFeatureConfigFromFile(string strFileName) { StreamReader sr = new StreamReader(strFileName); string strLine = null; m_FeatureConfiguration = new Dictionary <string, List <int> >(); while ((strLine = sr.ReadLine()) != null) { strLine = strLine.Trim(); if (strLine.Length == 0) { //Emtpy line, ignore it continue; } if (strLine.StartsWith("#") == true) { //Comments line, ignore it continue; } string[] kv = strLine.Split(':'); string strKey = kv[0].Trim(); string strValue = kv[1].Trim().ToLower(); if (strKey == WORDEMBEDDING_FILENAME) { if (m_WordEmbedding != null) { throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading embedding feature set from model {0}", strValue); m_WordEmbedding = new WordEMWrapFeaturizer(strValue); continue; } else if (strKey == WORDEMBEDDING_RAW_FILENAME) { if (m_WordEmbedding != null) { throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading embedding feature set from model {0} in text format", strValue); m_WordEmbedding = new WordEMWrapFeaturizer(strValue, true); continue; } else if (strKey == TFEATURE_FILENAME) { Logger.WriteLine("Loading template feature set..."); m_TFeaturizer = new TemplateFeaturizer(strValue); continue; } else if (strKey == WORDEMBEDDING_COLUMN) { m_WordEmbeddingCloumn = int.Parse(strValue); Logger.WriteLine("Word embedding feature column: {0}", m_WordEmbeddingCloumn); continue; } else if (strKey == TFEATURE_WEIGHT_TYPE) { Logger.WriteLine("TFeature weighting type: {0}", strValue); if (strValue == "binary") { m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY; } else { m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY; } continue; } string[] values = strValue.Split(','); if (m_FeatureConfiguration.ContainsKey(strKey) == false) { m_FeatureConfiguration.Add(strKey, new List <int>()); } foreach (string value in values) { m_FeatureConfiguration[strKey].Add(int.Parse(value)); } } sr.Close(); }
//The format of configuration file public void LoadFeatureConfigFromFile(string strFileName) { StreamReader sr = new StreamReader(strFileName); string strLine = null; FeatureContext = new Dictionary <string, List <int> >(); while ((strLine = sr.ReadLine()) != null) { strLine = strLine.Trim(); if (strLine.Length == 0) { //Emtpy line, ignore it continue; } if (strLine.StartsWith("#") == true) { //Comments line, ignore it continue; } int idxSeparator = strLine.IndexOf(':'); string strKey = strLine.Substring(0, idxSeparator).Trim(); string strValue = strLine.Substring(idxSeparator + 1).Trim(); if (strKey == PRETRAINEDMODEL_FILENAME) { if (PretainedModel != null) { throw new ArgumentException("Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading pretrained dense feature set from model {0}", strValue); PretainedModel = new WordEMWrapFeaturizer(strValue); } else if (strKey == PRETRAINEDMODEL_RAW_FILENAME) { if (PretainedModel != null) { throw new ArgumentException("Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading pretrained dense feature set from model {0} in text format", strValue); PretainedModel = new WordEMWrapFeaturizer(strValue, true); } else if (strKey == TFEATURE_FILENAME) { Logger.WriteLine("Loading template feature set..."); TFeaturizer = new TemplateFeaturizer(strValue); } else if (strKey == PRETRAINEDMODEL_COLUMN) { PretrainedModelColumn = int.Parse(strValue); Logger.WriteLine("Pretrained model feature column: {0}", PretrainedModelColumn); } else if (strKey == TFEATURE_WEIGHT_TYPE) { Logger.WriteLine("TFeature weighting type: {0}", strValue); if (strValue == "binary") { TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY; } else { TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY; } } else if (strKey == PRETRAIN_TYPE) { if (strValue.Equals(RNNSharp.PRETRAIN_TYPE.AUTOENCODER.ToString(), StringComparison.InvariantCultureIgnoreCase)) { preTrainType = RNNSharp.PRETRAIN_TYPE.AUTOENCODER; } else { preTrainType = RNNSharp.PRETRAIN_TYPE.EMBEDDING; } Logger.WriteLine("Pretrain type: {0}", preTrainType); } else if (strKey == AUTOENCODER_FEATURECONFIG) { autoEncoderFeatureConfigFile = strValue; Logger.WriteLine("Auto encoder configuration file: {0}", autoEncoderFeatureConfigFile); } else if (strKey == AUTOENCODER_MODEL) { autoEncoderModelFile = strValue; Logger.WriteLine("Auto encoder model file: {0}", autoEncoderModelFile); } else { string[] values = strValue.Split(','); if (FeatureContext.ContainsKey(strKey) == false) { FeatureContext.Add(strKey, new List <int>()); } foreach (string value in values) { FeatureContext[strKey].Add(int.Parse(value)); } } } sr.Close(); }
//The format of configuration file public void LoadFeatureConfigFromFile(string strFileName) { StreamReader sr = new StreamReader(strFileName); string strLine = null; m_FeatureConfiguration = new Dictionary<string, List<int>>(); while ((strLine = sr.ReadLine()) != null) { strLine = strLine.Trim(); if (strLine.Length == 0) { //Emtpy line, ignore it continue; } if (strLine.StartsWith("#") == true) { //Comments line, ignore it continue; } string[] kv = strLine.Split(':'); string strKey = kv[0].Trim(); string strValue = kv[1].Trim().ToLower(); if (strKey == WORDEMBEDDING_FILENAME) { if (m_WordEmbedding != null) { throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading embedding feature set from model {0}", strValue); m_WordEmbedding = new WordEMWrapFeaturizer(strValue); continue; } else if (strKey == WORDEMBEDDING_RAW_FILENAME) { if (m_WordEmbedding != null) { throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file."); } Logger.WriteLine("Loading embedding feature set from model {0} in text format", strValue); m_WordEmbedding = new WordEMWrapFeaturizer(strValue, true); continue; } else if (strKey == TFEATURE_FILENAME) { Logger.WriteLine("Loading template feature set..."); m_TFeaturizer = new TemplateFeaturizer(strValue); continue; } else if (strKey == WORDEMBEDDING_COLUMN) { m_WordEmbeddingCloumn = int.Parse(strValue); Logger.WriteLine("Word embedding feature column: {0}", m_WordEmbeddingCloumn); continue; } else if (strKey == TFEATURE_WEIGHT_TYPE) { Logger.WriteLine("TFeature weighting type: {0}", strValue); if (strValue == "binary") { m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY; } else { m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY; } continue; } string[] values = strValue.Split(','); if (m_FeatureConfiguration.ContainsKey(strKey) == false) { m_FeatureConfiguration.Add(strKey, new List<int>()); } foreach (string value in values) { m_FeatureConfiguration[strKey].Add(int.Parse(value)); } } sr.Close(); }