public ComplexSeg(Dictionary dic) : base(dic) { otherRules.Add(new LargestAvgLenRule()); otherRules.Add(new SmallestVarianceRule()); otherRules.Add(new LargestSumDegreeFreedomRule()); }
/// <summary> /// 销毁,释放资源 /// </summary> void destroy() { dicPath = null; dict.Clear(); dict = null; unit.Clear(); unit = null; }
Dictionary<char,object> loadUnit(FileInfo path) { string[] lines = null; if (path.Exists) { lines = File.ReadAllLines(path.FullName); addLastTime(path); } else { lines = getLinesFromResources("units.dic"); } Dictionary<char, object> unit = new Dictionary<char, object>(); long s = now; int lineNum = 0; foreach (string line in lines) { if (line.Length != 1) continue; if (!unit.ContainsKey(line[0])) unit.Add(line[0], typeof(Dictionary)); ++lineNum; } Log.Info("unit loaded time={0}ms,line={1},on file={2}", now - s, lineNum, path); return unit; }
void loadWord(string[] buffers, Dictionary<char, CharNode> dic) { long s = now; int lineNum = WordsLoading(buffers, dic); Log.Info("words loaded time={0}ms,line={1},on file=words.dic", now - s, lineNum); }
public MMSegAnalyzer(Dictionary p_dic) : base() { dic = p_dic; }
static int WordsLoading(string[] buffers, Dictionary<char, CharNode> dic) { CharNode cn = null; int count = 0; foreach (string line in buffers) { if(line.Length < 2) continue; cn = null; if (dic.ContainsKey(line[0])) cn = dic[line[0]]; if (cn == null) { cn = new CharNode(); dic.Add(line[0], cn); } ++count; cn.AddWordTail(tail(line)); } return count; }
public MaxWordAnalyzer(Dictionary dic) : base(dic) { }
void init(FileInfo path) { dicPath = path; wordsLastTime = new Dictionary<string, long>(); reload(); }
public TreeNode(char key) { this.key = key; subNodes = new Dictionary<char, TreeNode>(); }
public SimpleSeg(Dictionary dic) : base(dic) { }
public MaxWordSeg(Dictionary dic) : base(dic) { }
public SimpleAnalyzer(Dictionary dic) : base(dic) { }
public static Dictionary getInstance(FileInfo path) { Dictionary dic = null; lock (m_syncOjbect) { if (dics.ContainsKey(path)) { dic = dics[path]; } else { dic = new Dictionary(path); dics.Add(path, dic); } } return dic; }
public static Dictionary getInstance(FileInfo path) { Dictionary dic = null; string key = path.ToString(); lock (m_syncOjbect) { if (dics.ContainsKey(key)) { dic = dics[key]; } else { dic = new Dictionary(path); dics.Add(key, dic); } } return dic; }
public MMSegAnalyzer() { dic = Dictionary.getInstance(); }
/// <summary> /// 全部加载词库,没有成功加载会回滚 /// 注意:重新加载时,务必有两倍的词库树结构的内存,默认词库是 50M/个 左右。否则抛出 OOM。 /// </summary> /// <returns></returns> public bool reload() { Dictionary<string, long> oldWordsLastTime = new Dictionary<string, long>(wordsLastTime); Dictionary<char, CharNode> oldDict = dict; Dictionary<char, object> oldUnit = unit; try { wordsLastTime.Clear(); dict = loadDic(dicPath); unit = loadUnit(dicPath); lastLoadTime = now; } catch (Exception ex) { //rollback foreach (KeyValuePair<string, long> key in oldWordsLastTime) { wordsLastTime.Add(key.Key, key.Value); } dict = oldDict; unit = oldUnit; Log.Info("reload dic error! dic={0},and rollbacked.{1}", dicPath, ex); return false; } return true; }
/// <summary> /// 构造函数 /// </summary> /// <param name="path">词库目录</param> public MMSegAnalyzer(string path) { dic = Dictionary.getInstance(path); }
/// <summary> /// 文件总行数 /// </summary> /// <param name="file"></param> /// <returns></returns> static int load(string[] buffers, Dictionary<char, CharNode> dic) { if (buffers == null) return 0; int n = 0; string[] w = null; CharNode cn = null; foreach (string line in buffers) { if (string.IsNullOrEmpty(line)) continue; if (line.StartsWith("#")) continue; cn = new CharNode(); w = line.Split(' '); if (w.Length == 2) { try { //字频计算出自由度 cn.Freq = (int)(Math.Log(Int32.Parse(w[1])) * 100); } catch { } } if (!dic.ContainsKey(w[0][0])) { dic.Add(w[0][0], cn); } else dic[w[0][0]] = cn; ++n; } return n; }
public MMSegAnalyzer(FileInfo path) { dic = Dictionary.getInstance(path); }
Dictionary<char, CharNode> loadDic(FileInfo wordsPath) { string path = wordsPath.FullName; string[] data = null; if(!wordsPath.Exists) wordsPath = new FileInfo(string.Format("{0}\\data\\chars.dic",wordsPath.FullName)); if (wordsPath.Exists) { data = File.ReadAllLines(wordsPath.FullName); addLastTime(wordsPath); } else { data = getLinesFromResources("chars.dic"); } Dictionary<char,CharNode> dic = new Dictionary<char,CharNode>(); int lineNum = 0; long s = now; long ss = s; lineNum = load(data, dic); Log.Info("chars loaded time={0}ms,line={1},on file={2}", now - s, lineNum, wordsPath); //try load words.dic string[] wordsDicIn = getLinesFromResources("words.dic"); if (wordsDicIn != null) { loadWord(wordsDicIn, dic); } string[] words = listWordsFiles(); if (words != null) { foreach (string wordsFile in words) { if (!File.Exists(wordsFile)) continue; loadWord(File.ReadAllLines(wordsFile), dic); addLastTime(new FileInfo(wordsFile)); } } Log.Info("load all dic user time={0}ms", now - ss); return dic; }
public ComplexAnalyzer(Dictionary dic) : base(dic) { }