Example #1
0
        public TokenizeConfig(string configStr)
        {
            foreach (var kvp in StringOperations.ParseStringStringDictionary(configStr))
            {
                switch (kvp.Key)
                {
                case "TokenizerType":
                    TokenizerType = (TokenizerType)StringOperations.ParseEnum(typeof(TokenizerType), kvp.Value);
                    break;

                case "StopWordFile":
                    StopWordFile = kvp.Value;
                    break;

                case "AddStopWordsFile":
                    AddStopWordsFile = kvp.Value.Length > 0 ? kvp.Value : null;
                    break;

                case "UserDictFile":
                    UserDictFile = kvp.Value.Length > 0 ? kvp.Value : null;
                    break;
                }
            }

            Initialize();
        }
        public static string GetDocumentContent(Document doc, Dictionary <string, int> fieldWeightDict, Dictionary <string, int> leadingSentencesCnt = null)
        {
            string content = "";

            foreach (var kvp in fieldWeightDict)
            {
                var val = doc.Get(kvp.Key);
                if (leadingSentencesCnt != null && leadingSentencesCnt.ContainsKey(kvp.Key))
                {
                    val = StringOperations.GetLeadingSentences(val, leadingSentencesCnt[kvp.Key]);
                }

                for (int i = 0; i < kvp.Value; i++)
                {
                    content += val + " ";
                }
            }
            return(content.Substring(0, content.Length - 1));
        }
Example #3
0
        public static Dictionary <string, string> ParseStringStringDictionary(string str,
                                                                              char[] level1Seperators, char level2Seperator)
        {
            var seperator2 = new char[] { level2Seperator };
            var tokens1    = str.Split(level1Seperators, StringSplitOptions.RemoveEmptyEntries);
            var dict       = new Dictionary <string, string>();

            foreach (var token1 in tokens1)
            {
                var tokens2 = token1.Split(seperator2);
                if (tokens2.Length > 2)
                {
                    dict[tokens2[0]] = StringOperations.GetMergedString(tokens2.Skip(1).ToList(), level2Seperator);
                }
                else
                {
                    dict[tokens2[0]] = tokens2[1];
                }
            }
            return(dict);
        }
Example #4
0
        public void Get(List <string> configStrs, string seperator4 = "\t")
        {
            foreach (var configStr in configStrs)
            {
                var tokens = configStr.Split(new string[] { seperator4 }, StringSplitOptions.RemoveEmptyEntries);
                if (tokens.Length < 1)
                {
                    continue;
                }
                var paraName = tokens[0];
                var paraVal  = tokens.Length >= 2 ? tokens[1] : null;
                switch (paraName)
                {
                case "TokenizerType":
                    TokenizerType = (TokenizerType)StringOperations.ParseEnum(typeof(TokenizerType), paraVal);
                    break;

                case "StopWordFile":
                    StopWordFile = paraVal;
                    break;

                case "AddStopWordsFile":
                    AddStopWordsFile = paraVal;
                    break;

                case "UserDictFile":
                    UserDictFile = paraVal;
                    break;

                default:
                    throw new ArgumentException();
                }
            }

            Initialize();
        }
Example #5
0
 public static T ParseEnum <T>(string name)
 {
     return((T)StringOperations.ParseEnum(typeof(T), name));
 }
Example #6
0
 public static DateTime GetDateTimeByString(string dateTimeString)
 {
     return(StringOperations.ParseDateTimeStringSystem(dateTimeString, TimeFormat));
 }