public TokenizeConfig(string configStr) { foreach (var kvp in StringOperations.ParseStringStringDictionary(configStr)) { switch (kvp.Key) { case "TokenizerType": TokenizerType = (TokenizerType)StringOperations.ParseEnum(typeof(TokenizerType), kvp.Value); break; case "StopWordFile": StopWordFile = kvp.Value; break; case "AddStopWordsFile": AddStopWordsFile = kvp.Value.Length > 0 ? kvp.Value : null; break; case "UserDictFile": UserDictFile = kvp.Value.Length > 0 ? kvp.Value : null; break; } } Initialize(); }
public static string GetDocumentContent(Document doc, Dictionary <string, int> fieldWeightDict, Dictionary <string, int> leadingSentencesCnt = null) { string content = ""; foreach (var kvp in fieldWeightDict) { var val = doc.Get(kvp.Key); if (leadingSentencesCnt != null && leadingSentencesCnt.ContainsKey(kvp.Key)) { val = StringOperations.GetLeadingSentences(val, leadingSentencesCnt[kvp.Key]); } for (int i = 0; i < kvp.Value; i++) { content += val + " "; } } return(content.Substring(0, content.Length - 1)); }
public static Dictionary <string, string> ParseStringStringDictionary(string str, char[] level1Seperators, char level2Seperator) { var seperator2 = new char[] { level2Seperator }; var tokens1 = str.Split(level1Seperators, StringSplitOptions.RemoveEmptyEntries); var dict = new Dictionary <string, string>(); foreach (var token1 in tokens1) { var tokens2 = token1.Split(seperator2); if (tokens2.Length > 2) { dict[tokens2[0]] = StringOperations.GetMergedString(tokens2.Skip(1).ToList(), level2Seperator); } else { dict[tokens2[0]] = tokens2[1]; } } return(dict); }
public void Get(List <string> configStrs, string seperator4 = "\t") { foreach (var configStr in configStrs) { var tokens = configStr.Split(new string[] { seperator4 }, StringSplitOptions.RemoveEmptyEntries); if (tokens.Length < 1) { continue; } var paraName = tokens[0]; var paraVal = tokens.Length >= 2 ? tokens[1] : null; switch (paraName) { case "TokenizerType": TokenizerType = (TokenizerType)StringOperations.ParseEnum(typeof(TokenizerType), paraVal); break; case "StopWordFile": StopWordFile = paraVal; break; case "AddStopWordsFile": AddStopWordsFile = paraVal; break; case "UserDictFile": UserDictFile = paraVal; break; default: throw new ArgumentException(); } } Initialize(); }
public static T ParseEnum <T>(string name) { return((T)StringOperations.ParseEnum(typeof(T), name)); }
public static DateTime GetDateTimeByString(string dateTimeString) { return(StringOperations.ParseDateTimeStringSystem(dateTimeString, TimeFormat)); }