Exemplo n.º 1
0
        public PolyDict(string fn, Pinyin pinyin)
        {
            string line;
            // 玩意兒 玩意儿 [wan2 yi4 r5] /erhua variant of 玩意[wan2 yi4]/
            var re = new Regex(@"^([^ ]+) ([^ ]+) \[([^\]]+)\]");

            using (var sr = new StreamReader(fn))
            {
                while ((line = sr.ReadLine()) != null)
                {
                    var m = re.Match(line);
                    if (!m.Success)
                    {
                        continue;
                    }
                    string pinyinStr = m.Groups[3].Value;
                    pinyinStr = pinyinStr.Replace("u:", "v").Replace("5", "").ToLowerInvariant();
                    var  sylls = pinyinStr.Split(' ');
                    bool skip  = false;
                    foreach (var syll in sylls)
                    {
                        skip |= !pinyin.IsNumSyllable(syll);
                    }
                    if (skip)
                    {
                        continue;
                    }
                    string trad = m.Groups[1].Value;
                    string simp = m.Groups[2].Value;
                    if (sylls.Length != trad.Length || sylls.Length == 1 || trad.Length != simp.Length)
                    {
                        continue;
                    }
                    if (!dictSimp.ContainsKey(pinyinStr))
                    {
                        dictSimp[pinyinStr] = new List <string>();
                    }
                    if (!dictTrad.ContainsKey(pinyinStr))
                    {
                        dictTrad[pinyinStr] = new List <string>();
                    }
                    dictSimp[pinyinStr].Add(simp);
                    dictTrad[pinyinStr].Add(trad);
                }
            }
        }
Exemplo n.º 2
0
        public PolyDict(string fn, Pinyin pinyin)
        {
            string line;
            // 玩意兒 玩意儿 [wan2 yi4 r5] /erhua variant of 玩意[wan2 yi4]/
            var re = new Regex(@"^([^ ]+) ([^ ]+) \[([^\]]+)\]");

            using (var sr = new StreamReader(fn))
            {
                while ((line = sr.ReadLine()) != null)
                {
                    var m = re.Match(line);
                    if (!m.Success)
                    {
                        continue;
                    }
                    string pinyinStr = m.Groups[3].Value;
                    pinyinStr = pinyinStr.Replace("u:", "v").Replace("5", "").ToLowerInvariant();
                    var    sylls = pinyinStr.Split(' ');
                    string trad  = m.Groups[1].Value;
                    string simp  = m.Groups[2].Value;
                    var    usimp = new List <string>();
                    var    utrad = new List <string>();
                    foreach (string chr in asUniChars(simp))
                    {
                        usimp.Add(chr);
                    }
                    foreach (string chr in asUniChars(trad))
                    {
                        utrad.Add(chr);
                    }

                    trad = "";
                    simp = "";
                    for (int i = 0; i < utrad.Count; ++i)
                    {
                        if (i != 0)
                        {
                            trad += ' ';
                        }
                        trad += utrad[i];
                    }
                    for (int i = 0; i < usimp.Count; ++i)
                    {
                        if (i != 0)
                        {
                            simp += ' ';
                        }
                        simp += usimp[i];
                    }

                    bool skip = false;
                    skip |= (sylls.Length != utrad.Count || utrad.Count != usimp.Count);
                    foreach (var syll in sylls)
                    {
                        skip |= !pinyin.IsNumSyllable(syll);
                    }
                    foreach (string ts in utrad)
                    {
                        skip |= !isHanzi(ts);
                    }
                    if (skip)
                    {
                        continue;
                    }

                    if (!DictSimp.ContainsKey(pinyinStr))
                    {
                        DictSimp[pinyinStr] = new List <string>();
                    }
                    if (!DictTrad.ContainsKey(pinyinStr))
                    {
                        DictTrad[pinyinStr] = new List <string>();
                    }
                    if (usimp.Count > 1)
                    {
                        DictSimp[pinyinStr].Add(simp);
                        DictTrad[pinyinStr].Add(trad);
                    }
                    for (int i = 0; i < sylls.Length; ++i)
                    {
                        if (!charReadingsSimp.ContainsKey(usimp[i]))
                        {
                            charReadingsSimp[usimp[i]] = new List <string>();
                        }
                        if (!charReadingsTrad.ContainsKey(utrad[i]))
                        {
                            charReadingsTrad[utrad[i]] = new List <string>();
                        }
                        charReadingsSimp[usimp[i]].Add(sylls[i]);
                        charReadingsTrad[utrad[i]].Add(sylls[i]);
                    }
                }
            }
        }