Example #1
0
        private void SetPretrainedModel()
        {
            //Load pre-trained model. It supports embedding model and auto-encoder model
            var preTrainTypeValue = config.GetValueRequired(PRETRAIN_TYPE);

            Logger.WriteLine("Pretrain type: {0}", preTrainTypeValue);

            if (preTrainTypeValue.Equals(RNNSharp.PRETRAIN_TYPE.AutoEncoder.ToString(),
                                         StringComparison.InvariantCultureIgnoreCase))
            {
                preTrainType = RNNSharp.PRETRAIN_TYPE.AutoEncoder;
                var autoEncoderConfigFilePath = GetFilePath(currentDirectory,
                                                            config.GetValueRequired(AUTOENCODER_CONFIG));
                Logger.WriteLine($"Loading auto encoder model. Config file = '{autoEncoderConfigFilePath}'");
                autoEncoder = InitializeAutoEncoder(autoEncoderConfigFilePath);
            }
            else
            {
                preTrainType = RNNSharp.PRETRAIN_TYPE.Embedding;
                var preTrainedModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_FILENAME);
                if (string.IsNullOrEmpty(preTrainedModelFilePath) == false)
                {
                    preTrainedModelFilePath = GetFilePath(currentDirectory, preTrainedModelFilePath);
                    if (preTrainedModel != null)
                    {
                        throw new ArgumentException(
                                  "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine($"Loading pretrained embedding model: {preTrainedModelFilePath}");
                    preTrainedModel = new WordEMWrapFeaturizer(preTrainedModelFilePath);
                }

                var preTrainedRawModelFilePath = config.GetValueOptional(PRETRAINEDMODEL_RAW_FILENAME);
                if (string.IsNullOrEmpty(preTrainedRawModelFilePath) == false)
                {
                    preTrainedRawModelFilePath = GetFilePath(currentDirectory, preTrainedRawModelFilePath);
                    if (preTrainedModel != null)
                    {
                        throw new ArgumentException(
                                  "Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine($"Loading pretrained embedding model {preTrainedRawModelFilePath} in text format");
                    preTrainedModel = new WordEMWrapFeaturizer(preTrainedRawModelFilePath, true);
                }

                preTrainedModelColumn = int.Parse(config.GetValueRequired(PRETRAINEDMODEL_COLUMN));
                Logger.WriteLine("Pretrained model feature column: {0}", preTrainedModelColumn);

                var preTrainedModelContext = config.GetValueRequired(WORDEMBEDDING_CONTEXT);
                featureContext.Add(WORDEMBEDDING_CONTEXT, new List <int>());
                foreach (var contextOffset in preTrainedModelContext.Split(','))
                {
                    featureContext[WORDEMBEDDING_CONTEXT].Add(int.Parse(contextOffset));
                }
                Logger.WriteLine($"Pretrained model context offset : {preTrainedModelContext}");
            }
        }
Example #2
0
        //The format of configuration file
        public void LoadFeatureConfigFromFile(string strFileName)
        {
            StreamReader sr      = new StreamReader(strFileName);
            string       strLine = null;

            m_FeatureConfiguration = new Dictionary <string, List <int> >();
            while ((strLine = sr.ReadLine()) != null)
            {
                strLine = strLine.Trim();
                if (strLine.Length == 0)
                {
                    //Emtpy line, ignore it
                    continue;
                }

                if (strLine.StartsWith("#") == true)
                {
                    //Comments line, ignore it
                    continue;
                }

                string[] kv       = strLine.Split(':');
                string   strKey   = kv[0].Trim();
                string   strValue = kv[1].Trim().ToLower();
                if (strKey == WORDEMBEDDING_FILENAME)
                {
                    if (m_WordEmbedding != null)
                    {
                        throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading embedding feature set from model {0}", strValue);
                    m_WordEmbedding = new WordEMWrapFeaturizer(strValue);
                    continue;
                }
                else if (strKey == WORDEMBEDDING_RAW_FILENAME)
                {
                    if (m_WordEmbedding != null)
                    {
                        throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading embedding feature set from model {0} in text format", strValue);
                    m_WordEmbedding = new WordEMWrapFeaturizer(strValue, true);
                    continue;
                }
                else if (strKey == TFEATURE_FILENAME)
                {
                    Logger.WriteLine("Loading template feature set...");
                    m_TFeaturizer = new TemplateFeaturizer(strValue);
                    continue;
                }
                else if (strKey == WORDEMBEDDING_COLUMN)
                {
                    m_WordEmbeddingCloumn = int.Parse(strValue);
                    Logger.WriteLine("Word embedding feature column: {0}", m_WordEmbeddingCloumn);
                    continue;
                }
                else if (strKey == TFEATURE_WEIGHT_TYPE)
                {
                    Logger.WriteLine("TFeature weighting type: {0}", strValue);
                    if (strValue == "binary")
                    {
                        m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY;
                    }
                    else
                    {
                        m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY;
                    }

                    continue;
                }

                string[] values = strValue.Split(',');

                if (m_FeatureConfiguration.ContainsKey(strKey) == false)
                {
                    m_FeatureConfiguration.Add(strKey, new List <int>());
                }

                foreach (string value in values)
                {
                    m_FeatureConfiguration[strKey].Add(int.Parse(value));
                }
            }

            sr.Close();
        }
Example #3
0
        //The format of configuration file
        public void LoadFeatureConfigFromFile(string strFileName)
        {
            StreamReader sr      = new StreamReader(strFileName);
            string       strLine = null;

            FeatureContext = new Dictionary <string, List <int> >();
            while ((strLine = sr.ReadLine()) != null)
            {
                strLine = strLine.Trim();
                if (strLine.Length == 0)
                {
                    //Emtpy line, ignore it
                    continue;
                }

                if (strLine.StartsWith("#") == true)
                {
                    //Comments line, ignore it
                    continue;
                }

                int    idxSeparator = strLine.IndexOf(':');
                string strKey       = strLine.Substring(0, idxSeparator).Trim();
                string strValue     = strLine.Substring(idxSeparator + 1).Trim();
                if (strKey == PRETRAINEDMODEL_FILENAME)
                {
                    if (PretainedModel != null)
                    {
                        throw new ArgumentException("Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading pretrained dense feature set from model {0}", strValue);
                    PretainedModel = new WordEMWrapFeaturizer(strValue);
                }
                else if (strKey == PRETRAINEDMODEL_RAW_FILENAME)
                {
                    if (PretainedModel != null)
                    {
                        throw new ArgumentException("Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading pretrained dense feature set from model {0} in text format", strValue);
                    PretainedModel = new WordEMWrapFeaturizer(strValue, true);
                }
                else if (strKey == TFEATURE_FILENAME)
                {
                    Logger.WriteLine("Loading template feature set...");
                    TFeaturizer = new TemplateFeaturizer(strValue);
                }
                else if (strKey == PRETRAINEDMODEL_COLUMN)
                {
                    PretrainedModelColumn = int.Parse(strValue);
                    Logger.WriteLine("Pretrained model feature column: {0}", PretrainedModelColumn);
                }
                else if (strKey == TFEATURE_WEIGHT_TYPE)
                {
                    Logger.WriteLine("TFeature weighting type: {0}", strValue);
                    if (strValue == "binary")
                    {
                        TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY;
                    }
                    else
                    {
                        TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY;
                    }
                }
                else if (strKey == PRETRAIN_TYPE)
                {
                    if (strValue.Equals(RNNSharp.PRETRAIN_TYPE.AUTOENCODER.ToString(), StringComparison.InvariantCultureIgnoreCase))
                    {
                        preTrainType = RNNSharp.PRETRAIN_TYPE.AUTOENCODER;
                    }
                    else
                    {
                        preTrainType = RNNSharp.PRETRAIN_TYPE.EMBEDDING;
                    }

                    Logger.WriteLine("Pretrain type: {0}", preTrainType);
                }
                else if (strKey == AUTOENCODER_FEATURECONFIG)
                {
                    autoEncoderFeatureConfigFile = strValue;
                    Logger.WriteLine("Auto encoder configuration file: {0}", autoEncoderFeatureConfigFile);
                }
                else if (strKey == AUTOENCODER_MODEL)
                {
                    autoEncoderModelFile = strValue;
                    Logger.WriteLine("Auto encoder model file: {0}", autoEncoderModelFile);
                }
                else
                {
                    string[] values = strValue.Split(',');

                    if (FeatureContext.ContainsKey(strKey) == false)
                    {
                        FeatureContext.Add(strKey, new List <int>());
                    }

                    foreach (string value in values)
                    {
                        FeatureContext[strKey].Add(int.Parse(value));
                    }
                }
            }

            sr.Close();
        }
Example #4
0
        //The format of configuration file
        public void LoadFeatureConfigFromFile(string strFileName)
        {
            StreamReader sr = new StreamReader(strFileName);
            string strLine = null;

            m_FeatureConfiguration = new Dictionary<string, List<int>>();
            while ((strLine = sr.ReadLine()) != null)
            {
                strLine = strLine.Trim();
                if (strLine.Length == 0)
                {
                    //Emtpy line, ignore it
                    continue;
                }

                if (strLine.StartsWith("#") == true)
                {
                    //Comments line, ignore it
                    continue;
                }

                string[] kv = strLine.Split(':');
                string strKey = kv[0].Trim();
                string strValue = kv[1].Trim().ToLower();
                if (strKey == WORDEMBEDDING_FILENAME)
                {
                    if (m_WordEmbedding != null)
                    {
                        throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading embedding feature set from model {0}", strValue);
                    m_WordEmbedding = new WordEMWrapFeaturizer(strValue);
                    continue;
                }
                else if (strKey == WORDEMBEDDING_RAW_FILENAME)
                {
                    if (m_WordEmbedding != null)
                    {
                        throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading embedding feature set from model {0} in text format", strValue);
                    m_WordEmbedding = new WordEMWrapFeaturizer(strValue, true);
                    continue;
                }
                else if (strKey == TFEATURE_FILENAME)
                {
                    Logger.WriteLine("Loading template feature set...");
                    m_TFeaturizer = new TemplateFeaturizer(strValue);
                    continue;
                }
                else if (strKey == WORDEMBEDDING_COLUMN)
                {
                    m_WordEmbeddingCloumn = int.Parse(strValue);
                    Logger.WriteLine("Word embedding feature column: {0}", m_WordEmbeddingCloumn);
                    continue;
                }
                else if (strKey == TFEATURE_WEIGHT_TYPE)
                {
                    Logger.WriteLine("TFeature weighting type: {0}", strValue);
                    if (strValue == "binary")
                    {
                        m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY;
                    }
                    else
                    {
                        m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY;
                    }

                    continue;
                }

                string[] values = strValue.Split(',');

                if (m_FeatureConfiguration.ContainsKey(strKey) == false)
                {
                    m_FeatureConfiguration.Add(strKey, new List<int>());
                }

                foreach (string value in values)
                {
                    m_FeatureConfiguration[strKey].Add(int.Parse(value));
                }
            }

            sr.Close();
        }