public PolyDict(string fn, Pinyin pinyin) { string line; // 玩意兒 玩意儿 [wan2 yi4 r5] /erhua variant of 玩意[wan2 yi4]/ var re = new Regex(@"^([^ ]+) ([^ ]+) \[([^\]]+)\]"); using (var sr = new StreamReader(fn)) { while ((line = sr.ReadLine()) != null) { var m = re.Match(line); if (!m.Success) { continue; } string pinyinStr = m.Groups[3].Value; pinyinStr = pinyinStr.Replace("u:", "v").Replace("5", "").ToLowerInvariant(); var sylls = pinyinStr.Split(' '); bool skip = false; foreach (var syll in sylls) { skip |= !pinyin.IsNumSyllable(syll); } if (skip) { continue; } string trad = m.Groups[1].Value; string simp = m.Groups[2].Value; if (sylls.Length != trad.Length || sylls.Length == 1 || trad.Length != simp.Length) { continue; } if (!dictSimp.ContainsKey(pinyinStr)) { dictSimp[pinyinStr] = new List <string>(); } if (!dictTrad.ContainsKey(pinyinStr)) { dictTrad[pinyinStr] = new List <string>(); } dictSimp[pinyinStr].Add(simp); dictTrad[pinyinStr].Add(trad); } } }
public PolyDict(string fn, Pinyin pinyin) { string line; // 玩意兒 玩意儿 [wan2 yi4 r5] /erhua variant of 玩意[wan2 yi4]/ var re = new Regex(@"^([^ ]+) ([^ ]+) \[([^\]]+)\]"); using (var sr = new StreamReader(fn)) { while ((line = sr.ReadLine()) != null) { var m = re.Match(line); if (!m.Success) { continue; } string pinyinStr = m.Groups[3].Value; pinyinStr = pinyinStr.Replace("u:", "v").Replace("5", "").ToLowerInvariant(); var sylls = pinyinStr.Split(' '); string trad = m.Groups[1].Value; string simp = m.Groups[2].Value; var usimp = new List <string>(); var utrad = new List <string>(); foreach (string chr in asUniChars(simp)) { usimp.Add(chr); } foreach (string chr in asUniChars(trad)) { utrad.Add(chr); } trad = ""; simp = ""; for (int i = 0; i < utrad.Count; ++i) { if (i != 0) { trad += ' '; } trad += utrad[i]; } for (int i = 0; i < usimp.Count; ++i) { if (i != 0) { simp += ' '; } simp += usimp[i]; } bool skip = false; skip |= (sylls.Length != utrad.Count || utrad.Count != usimp.Count); foreach (var syll in sylls) { skip |= !pinyin.IsNumSyllable(syll); } foreach (string ts in utrad) { skip |= !isHanzi(ts); } if (skip) { continue; } if (!DictSimp.ContainsKey(pinyinStr)) { DictSimp[pinyinStr] = new List <string>(); } if (!DictTrad.ContainsKey(pinyinStr)) { DictTrad[pinyinStr] = new List <string>(); } if (usimp.Count > 1) { DictSimp[pinyinStr].Add(simp); DictTrad[pinyinStr].Add(trad); } for (int i = 0; i < sylls.Length; ++i) { if (!charReadingsSimp.ContainsKey(usimp[i])) { charReadingsSimp[usimp[i]] = new List <string>(); } if (!charReadingsTrad.ContainsKey(utrad[i])) { charReadingsTrad[utrad[i]] = new List <string>(); } charReadingsSimp[usimp[i]].Add(sylls[i]); charReadingsTrad[utrad[i]].Add(sylls[i]); } } } }