Example #1
0
        private void SetTFeatures()
        {
            string tfeatureFileName = config.GetValueOptional(TFEATURE_FILENAME);

            if (String.IsNullOrEmpty(tfeatureFileName) == false)
            {
                //Load template feature set
                var tfeatureFilePath = GetFilePath(currentDirectory, tfeatureFileName);
                Logger.WriteLine($"Loading template feature set from {tfeatureFilePath}");
                tFeaturizer = new TemplateFeaturizer(tfeatureFilePath);

                var tfeatureWeightType = config.GetValueRequired(TFEATURE_WEIGHT_TYPE);
                tFeatureWeightType = tfeatureWeightType.Equals("binary", StringComparison.InvariantCultureIgnoreCase)
                    ? TFEATURE_WEIGHT_TYPE_ENUM.BINARY
                    : TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY;
                Logger.WriteLine($"TFeature weight type: {tfeatureWeightType}");

                var tfeatureContext = config.GetValueRequired(TFEATURE_CONTEXT);
                featureContext.Add(TFEATURE_CONTEXT, new List <int>());
                foreach (var contextOffset in tfeatureContext.Split(','))
                {
                    featureContext[TFEATURE_CONTEXT].Add(int.Parse(contextOffset));
                }
                Logger.WriteLine($"TFeature context: {tfeatureContext}");
            }
            else
            {
                Logger.WriteLine($"No TFeature available.");
            }
        }
Example #2
0
        //The format of configuration file
        public void LoadFeatureConfigFromFile(string strFileName)
        {
            StreamReader sr      = new StreamReader(strFileName);
            string       strLine = null;

            m_FeatureConfiguration = new Dictionary <string, List <int> >();
            while ((strLine = sr.ReadLine()) != null)
            {
                strLine = strLine.Trim();
                if (strLine.Length == 0)
                {
                    //Emtpy line, ignore it
                    continue;
                }

                if (strLine.StartsWith("#") == true)
                {
                    //Comments line, ignore it
                    continue;
                }

                string[] kv       = strLine.Split(':');
                string   strKey   = kv[0].Trim();
                string   strValue = kv[1].Trim().ToLower();
                if (strKey == WORDEMBEDDING_FILENAME)
                {
                    if (m_WordEmbedding != null)
                    {
                        throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading embedding feature set from model {0}", strValue);
                    m_WordEmbedding = new WordEMWrapFeaturizer(strValue);
                    continue;
                }
                else if (strKey == WORDEMBEDDING_RAW_FILENAME)
                {
                    if (m_WordEmbedding != null)
                    {
                        throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading embedding feature set from model {0} in text format", strValue);
                    m_WordEmbedding = new WordEMWrapFeaturizer(strValue, true);
                    continue;
                }
                else if (strKey == TFEATURE_FILENAME)
                {
                    Logger.WriteLine("Loading template feature set...");
                    m_TFeaturizer = new TemplateFeaturizer(strValue);
                    continue;
                }
                else if (strKey == WORDEMBEDDING_COLUMN)
                {
                    m_WordEmbeddingCloumn = int.Parse(strValue);
                    Logger.WriteLine("Word embedding feature column: {0}", m_WordEmbeddingCloumn);
                    continue;
                }
                else if (strKey == TFEATURE_WEIGHT_TYPE)
                {
                    Logger.WriteLine("TFeature weighting type: {0}", strValue);
                    if (strValue == "binary")
                    {
                        m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY;
                    }
                    else
                    {
                        m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY;
                    }

                    continue;
                }

                string[] values = strValue.Split(',');

                if (m_FeatureConfiguration.ContainsKey(strKey) == false)
                {
                    m_FeatureConfiguration.Add(strKey, new List <int>());
                }

                foreach (string value in values)
                {
                    m_FeatureConfiguration[strKey].Add(int.Parse(value));
                }
            }

            sr.Close();
        }
Example #3
0
        //The format of configuration file
        public void LoadFeatureConfigFromFile(string strFileName)
        {
            StreamReader sr      = new StreamReader(strFileName);
            string       strLine = null;

            FeatureContext = new Dictionary <string, List <int> >();
            while ((strLine = sr.ReadLine()) != null)
            {
                strLine = strLine.Trim();
                if (strLine.Length == 0)
                {
                    //Emtpy line, ignore it
                    continue;
                }

                if (strLine.StartsWith("#") == true)
                {
                    //Comments line, ignore it
                    continue;
                }

                int    idxSeparator = strLine.IndexOf(':');
                string strKey       = strLine.Substring(0, idxSeparator).Trim();
                string strValue     = strLine.Substring(idxSeparator + 1).Trim();
                if (strKey == PRETRAINEDMODEL_FILENAME)
                {
                    if (PretainedModel != null)
                    {
                        throw new ArgumentException("Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading pretrained dense feature set from model {0}", strValue);
                    PretainedModel = new WordEMWrapFeaturizer(strValue);
                }
                else if (strKey == PRETRAINEDMODEL_RAW_FILENAME)
                {
                    if (PretainedModel != null)
                    {
                        throw new ArgumentException("Static pretrained model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading pretrained dense feature set from model {0} in text format", strValue);
                    PretainedModel = new WordEMWrapFeaturizer(strValue, true);
                }
                else if (strKey == TFEATURE_FILENAME)
                {
                    Logger.WriteLine("Loading template feature set...");
                    TFeaturizer = new TemplateFeaturizer(strValue);
                }
                else if (strKey == PRETRAINEDMODEL_COLUMN)
                {
                    PretrainedModelColumn = int.Parse(strValue);
                    Logger.WriteLine("Pretrained model feature column: {0}", PretrainedModelColumn);
                }
                else if (strKey == TFEATURE_WEIGHT_TYPE)
                {
                    Logger.WriteLine("TFeature weighting type: {0}", strValue);
                    if (strValue == "binary")
                    {
                        TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY;
                    }
                    else
                    {
                        TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY;
                    }
                }
                else if (strKey == PRETRAIN_TYPE)
                {
                    if (strValue.Equals(RNNSharp.PRETRAIN_TYPE.AUTOENCODER.ToString(), StringComparison.InvariantCultureIgnoreCase))
                    {
                        preTrainType = RNNSharp.PRETRAIN_TYPE.AUTOENCODER;
                    }
                    else
                    {
                        preTrainType = RNNSharp.PRETRAIN_TYPE.EMBEDDING;
                    }

                    Logger.WriteLine("Pretrain type: {0}", preTrainType);
                }
                else if (strKey == AUTOENCODER_FEATURECONFIG)
                {
                    autoEncoderFeatureConfigFile = strValue;
                    Logger.WriteLine("Auto encoder configuration file: {0}", autoEncoderFeatureConfigFile);
                }
                else if (strKey == AUTOENCODER_MODEL)
                {
                    autoEncoderModelFile = strValue;
                    Logger.WriteLine("Auto encoder model file: {0}", autoEncoderModelFile);
                }
                else
                {
                    string[] values = strValue.Split(',');

                    if (FeatureContext.ContainsKey(strKey) == false)
                    {
                        FeatureContext.Add(strKey, new List <int>());
                    }

                    foreach (string value in values)
                    {
                        FeatureContext[strKey].Add(int.Parse(value));
                    }
                }
            }

            sr.Close();
        }
Example #4
0
        //The format of configuration file
        public void LoadFeatureConfigFromFile(string strFileName)
        {
            StreamReader sr = new StreamReader(strFileName);
            string strLine = null;

            m_FeatureConfiguration = new Dictionary<string, List<int>>();
            while ((strLine = sr.ReadLine()) != null)
            {
                strLine = strLine.Trim();
                if (strLine.Length == 0)
                {
                    //Emtpy line, ignore it
                    continue;
                }

                if (strLine.StartsWith("#") == true)
                {
                    //Comments line, ignore it
                    continue;
                }

                string[] kv = strLine.Split(':');
                string strKey = kv[0].Trim();
                string strValue = kv[1].Trim().ToLower();
                if (strKey == WORDEMBEDDING_FILENAME)
                {
                    if (m_WordEmbedding != null)
                    {
                        throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading embedding feature set from model {0}", strValue);
                    m_WordEmbedding = new WordEMWrapFeaturizer(strValue);
                    continue;
                }
                else if (strKey == WORDEMBEDDING_RAW_FILENAME)
                {
                    if (m_WordEmbedding != null)
                    {
                        throw new ArgumentException("Embedding model has already been loaded. Please check if settings is duplicated in configuration file.");
                    }
                    Logger.WriteLine("Loading embedding feature set from model {0} in text format", strValue);
                    m_WordEmbedding = new WordEMWrapFeaturizer(strValue, true);
                    continue;
                }
                else if (strKey == TFEATURE_FILENAME)
                {
                    Logger.WriteLine("Loading template feature set...");
                    m_TFeaturizer = new TemplateFeaturizer(strValue);
                    continue;
                }
                else if (strKey == WORDEMBEDDING_COLUMN)
                {
                    m_WordEmbeddingCloumn = int.Parse(strValue);
                    Logger.WriteLine("Word embedding feature column: {0}", m_WordEmbeddingCloumn);
                    continue;
                }
                else if (strKey == TFEATURE_WEIGHT_TYPE)
                {
                    Logger.WriteLine("TFeature weighting type: {0}", strValue);
                    if (strValue == "binary")
                    {
                        m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.BINARY;
                    }
                    else
                    {
                        m_TFeatureWeightType = TFEATURE_WEIGHT_TYPE_ENUM.FREQUENCY;
                    }

                    continue;
                }

                string[] values = strValue.Split(',');

                if (m_FeatureConfiguration.ContainsKey(strKey) == false)
                {
                    m_FeatureConfiguration.Add(strKey, new List<int>());
                }

                foreach (string value in values)
                {
                    m_FeatureConfiguration[strKey].Add(int.Parse(value));
                }
            }

            sr.Close();
        }